diff --git "a/profile_trace/iteration_14848/rank5_trace.json" "b/profile_trace/iteration_14848/rank5_trace.json" new file mode 100644--- /dev/null +++ "b/profile_trace/iteration_14848/rank5_trace.json" @@ -0,0 +1,68774 @@ + +{ + "schemaVersion": 1, + "deviceProperties": [ + { + "id": 0, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 1, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 2, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 3, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 4, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 5, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 6, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + }, + { + "id": 7, "name": "NVIDIA H200", "totalGlobalMem": 149631664128, + "computeMajor": 9, "computeMinor": 0, + "maxThreadsPerBlock": 1024, "maxThreadsPerMultiprocessor": 2048, + "regsPerBlock": 65536, "warpSize": 32, + "sharedMemPerBlock": 49152, "numSms": 132 + , "regsPerMultiprocessor": 65536, "sharedMemPerBlockOptin": 232448, "sharedMemPerMultiprocessor": 233472 + } + ], + "cupti_version": 22, + "cuda_runtime_version": 12040, + "cuda_driver_version": 12080, + "distributedInfo": {"backend": "nccl", "rank": 5, "world_size": 8, "pg_count": 1, "pg_config": [{"pg_name": "0", "pg_desc": "default_pg", "backend_config": "cuda:nccl", "pg_size": 8, "ranks": [0, 1, 2, 3, 4, 5, 6, 7]}], "nccl_version": "2.21.5"}, + "record_shapes": 1, + "trace_id": "2EF996EAF19D4A139CF0CD01BEBEED20", + "traceEvents": [ + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: DivBackward0", "pid": 2070552, "tid": 2107648, + "ts": 5333367618699.378, "dur": 132.438, + "args": { + "External id": 293377,"Record function id": 0, "Sequence number": 1209231, "Fwd thread id": 1, "Ev Idx": 0 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "DivBackward0", "pid": 2070552, "tid": 2107648, + "ts": 5333367618718.611, "dur": 104.497, + "args": { + "External id": 293378,"Sequence number": 1209231, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 1 + } + }, + { + "ph": "f", "id": 1, "pid": 2070552, "tid": 2107648, "ts": 5333367618718.611, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2070552, "tid": 2107648, + "ts": 5333367618727.865, "dur": 93.085, + "args": { + "External id": 293379,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 2 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: AddBackward0", "pid": 2070552, "tid": 2107648, + "ts": 5333367618846.148, "dur": 191.748, + "args": { + "External id": 293380,"Record function id": 0, "Ev Idx": 3 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333367618917.110, "dur": 105.339, + "args": { + "External id": 293381,"Record function id": 0, "Ev Idx": 4 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.31", "pid": 2070552, "tid": 2107648, + "ts": 5333367618953.767, "dur": 58.228, + "args": { + "External id": 293382,"Record function id": 0, "Ev Idx": 5 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "AddBackward0", "pid": 2070552, "tid": 2107648, + "ts": 5333367619027.645, "dur": 2.018, + "args": { + "External id": 293383,"Sequence number": 1209230, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 6 + } + }, + { + "ph": "f", "id": 2, "pid": 2070552, "tid": 2107648, "ts": 5333367619027.645, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearListNetFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367619046.002, "dur": 77461.692, + "args": { + "External id": 293384,"Record function id": 0, "Sequence number": 1209229, "Fwd thread id": 1, "Ev Idx": 7 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearListNetFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367619051.312, "dur": 77395.443, + "args": { + "External id": 293385,"Sequence number": 1209229, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 8 + } + }, + { + "ph": "f", "id": 3, "pid": 2070552, "tid": 2107648, "ts": 5333367619051.312, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367619093.226, "dur": 4.428, + "args": { + "External id": 293386,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333367619101.994, "dur": 77198.370, + "args": { + "External id": 293387,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 10 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367619105.021, "dur": 77194.861, + "args": { + "External id": 293388,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 11 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367619108.833, "dur": 9.330, + "args": { + "External id": 293389,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 12 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367619122.802, "dur": 77174.701, + "args": { + "External id": 293390,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 13 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2070552, "tid": 2107648, + "ts": 5333367696307.479, "dur": 0.734, + "args": { + "External id": 293391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 14 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2070552, "tid": 2107648, + "ts": 5333367696310.843, "dur": 4.306, + "args": { + "External id": 293392,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 15 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2070552, "tid": 2107648, + "ts": 5333367696312.936, "dur": 1.798, + "args": { + "External id": 293393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 16 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2070552, "tid": 2107648, + "ts": 5333367696325.247, "dur": 42.552, + "args": { + "External id": 293394,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 17 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2070552, "tid": 2107648, + "ts": 5333367696381.768, "dur": 56.786, + "args": { + "External id": 293395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 18 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2070552, "tid": 2107648, + "ts": 5333367696384.810, "dur": 53.558, + "args": { + "External id": 293396,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 19 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2070552, "tid": 2107648, + "ts": 5333367696387.377, "dur": 50.652, + "args": { + "External id": 293397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 20 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333367696454.428, "dur": 41.052, + "args": { + "External id": 293398,"Record function id": 0, "Concrete Inputs": ["", "15", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], []], "Ev Idx": 21 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367696455.639, "dur": 39.538, + "args": { + "External id": 293399,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], [], []], "Ev Idx": 22 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367696463.608, "dur": 7.845, + "args": { + "External id": 293400,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 23 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367696472.881, "dur": 21.723, + "args": { + "External id": 293401,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 24 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367696525.795, "dur": 15.181, + "args": { + "External id": 293402,"Record function id": 0, "Ev Idx": 25 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367696529.556, "dur": 9.640, + "args": { + "External id": 293403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 26 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367696533.218, "dur": 5.413, + "args": { + "External id": 293404,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 27 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367696534.123, "dur": 4.356, + "args": { + "External id": 293405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 28 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2070552, "tid": 2107648, + "ts": 5333367696545.328, "dur": 23.303, + "args": { + "External id": 293406,"Record function id": 0, "Sequence number": 1209228, "Fwd thread id": 1, "Ev Idx": 29 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2070552, "tid": 2107648, + "ts": 5333367696547.105, "dur": 18.919, + "args": { + "External id": 293407,"Sequence number": 1209228, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 30 + } + }, + { + "ph": "f", "id": 4, "pid": 2070552, "tid": 2107648, "ts": 5333367696547.105, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2070552, "tid": 2107648, + "ts": 5333367696554.879, "dur": 10.857, + "args": { + "External id": 293408,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 31 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367696558.953, "dur": 6.560, + "args": { + "External id": 293409,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 32 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: FusedLinearCrossEntropyFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367696573.167, "dur": 246.460, + "args": { + "External id": 293410,"Record function id": 0, "Sequence number": 1209227, "Fwd thread id": 1, "Ev Idx": 33 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367696584.069, "dur": 226.651, + "args": { + "External id": 293411,"Sequence number": 1209227, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 34 + } + }, + { + "ph": "f", "id": 5, "pid": 2070552, "tid": 2107648, "ts": 5333367696584.069, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367696660.606, "dur": 6.593, + "args": { + "External id": 293412,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 35 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333367696670.323, "dur": 59.820, + "args": { + "External id": 293413,"Record function id": 0, "Concrete Inputs": ["", "", "6", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 36 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367696671.605, "dur": 58.309, + "args": { + "External id": 293414,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], []], "Ev Idx": 37 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367696673.457, "dur": 7.320, + "args": { + "External id": 293415,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 38 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367696681.711, "dur": 47.767, + "args": { + "External id": 293416,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 39 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::lift_fresh", "pid": 2070552, "tid": 2107648, + "ts": 5333367696732.506, "dur": 0.568, + "args": { + "External id": 293417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 40 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach_", "pid": 2070552, "tid": 2107648, + "ts": 5333367696734.541, "dur": 4.395, + "args": { + "External id": 293418,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 41 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach_", "pid": 2070552, "tid": 2107648, + "ts": 5333367696737.950, "dur": 0.806, + "args": { + "External id": 293419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 42 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2070552, "tid": 2107648, + "ts": 5333367696742.247, "dur": 18.550, + "args": { + "External id": 293420,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 43 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2070552, "tid": 2107648, + "ts": 5333367696765.311, "dur": 38.114, + "args": { + "External id": 293421,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 44 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2070552, "tid": 2107648, + "ts": 5333367696766.401, "dur": 36.863, + "args": { + "External id": 293422,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 45 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2070552, "tid": 2107648, + "ts": 5333367696769.436, "dur": 33.484, + "args": { + "External id": 293423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 46 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367696827.843, "dur": 9.733, + "args": { + "External id": 293424,"Record function id": 0, "Ev Idx": 47 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367696831.366, "dur": 5.614, + "args": { + "External id": 293425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 48 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367696833.203, "dur": 2.607, + "args": { + "External id": 293426,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 49 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367696833.812, "dur": 1.811, + "args": { + "External id": 293427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 50 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: ViewBackward0", "pid": 2070552, "tid": 2107648, + "ts": 5333367696841.620, "dur": 47.304, + "args": { + "External id": 293428,"Record function id": 0, "Sequence number": 1209226, "Fwd thread id": 1, "Ev Idx": 51 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "ViewBackward0", "pid": 2070552, "tid": 2107648, + "ts": 5333367696842.913, "dur": 7.431, + "args": { + "External id": 293429,"Sequence number": 1209226, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 52 + } + }, + { + "ph": "f", "id": 6, "pid": 2070552, "tid": 2107648, "ts": 5333367696842.913, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reshape", "pid": 2070552, "tid": 2107648, + "ts": 5333367696844.773, "dur": 5.317, + "args": { + "External id": 293430,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 53 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367696847.669, "dur": 2.262, + "args": { + "External id": 293431,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 54 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070552, "tid": 2107648, + "ts": 5333367696855.355, "dur": 26.015, + "args": { + "External id": 293432,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048], []], "Ev Idx": 55 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367696894.555, "dur": 423.745, + "args": { + "External id": 293433,"Record function id": 0, "Sequence number": 1209225, "Fwd thread id": 1, "Ev Idx": 56 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367696895.896, "dur": 409.661, + "args": { + "External id": 293434,"Sequence number": 1209225, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 57 + } + }, + { + "ph": "f", "id": 7, "pid": 2070552, "tid": 2107648, "ts": 5333367696895.896, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333367697076.225, "dur": 50.412, + "args": { + "External id": 293435,"kernel_hash": "cuukjsp6rxz3jug6vt6aydazifg4agx2qo5hdktyvypms7xloy55", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/uu/cuukjsp6rxz3jug6vt6aydazifg4agx2qo5hdktyvypms7xloy55.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 58 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_0", "pid": 2070552, "tid": 2107648, + "ts": 5333367697165.091, "dur": 53.665, + "args": { + "External id": 293436,"kernel_hash": "ca6xizp2qkfxzkredwq3zuqbocaripz3jyqqq6oyyli4d7qxaxem", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/a6/ca6xizp2qkfxzkredwq3zuqbocaripz3jyqqq6oyyli4d7qxaxem.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 59 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367697247.082, "dur": 24.608, + "args": { + "External id": 293437,"kernel_hash": "c37t5saqik2yxxap5wjzy6t6ncvdgty24ktri3fnll7y5tjs566n", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/37/c37t5saqik2yxxap5wjzy6t6ncvdgty24ktri3fnll7y5tjs566n.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 60 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367697331.206, "dur": 11.090, + "args": { + "External id": 293438,"Record function id": 0, "Ev Idx": 61 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367697334.023, "dur": 7.501, + "args": { + "External id": 293439,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 62 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367697336.802, "dur": 3.969, + "args": { + "External id": 293440,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 63 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367697338.056, "dur": 2.554, + "args": { + "External id": 293441,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 64 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367697346.440, "dur": 3032.472, + "args": { + "External id": 293442,"Record function id": 0, "Ev Idx": 65 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.31)", "pid": 2070552, "tid": 2107648, + "ts": 5333367697376.209, "dur": 1098.759, + "args": { + "External id": 293443,"Record function id": 0, "Ev Idx": 66 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.30", "pid": 2070552, "tid": 2107648, + "ts": 5333367697402.626, "dur": 1063.107, + "args": { + "External id": 293444,"Record function id": 0, "Ev Idx": 67 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.30)", "pid": 2070552, "tid": 2107648, + "ts": 5333367697420.550, "dur": 1029.990, + "args": { + "External id": 293445,"Record function id": 0, "Ev Idx": 68 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367697533.580, "dur": 6.967, + "args": { + "External id": 293446,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 69 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333367697561.471, "dur": 47.051, + "args": { + "External id": 293447,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 70 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367697568.952, "dur": 7.288, + "args": { + "External id": 293448,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 71 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367697577.568, "dur": 0.571, + "args": { + "External id": 293449,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 72 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367697578.949, "dur": 2.145, + "args": { + "External id": 293450,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 73 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367697585.800, "dur": 0.682, + "args": { + "External id": 293451,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 74 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367697587.685, "dur": 0.587, + "args": { + "External id": 293452,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 75 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367697588.947, "dur": 0.363, + "args": { + "External id": 293453,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 76 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367697594.182, "dur": 0.480, + "args": { + "External id": 293454,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 77 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367697595.134, "dur": 0.558, + "args": { + "External id": 293455,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 78 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367697598.476, "dur": 2.473, + "args": { + "External id": 293456,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 79 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367697658.875, "dur": 47.697, + "args": { + "External id": 293457,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 80 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333367697752.770, "dur": 131.096, + "args": { + "External id": 293458,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 81 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367697766.946, "dur": 7.102, + "args": { + "External id": 293459,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 82 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333367697781.287, "dur": 15.528, + "args": { + "External id": 293460,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 83 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333367697786.494, "dur": 9.833, + "args": { + "External id": 293461,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 84 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367697791.863, "dur": 2.765, + "args": { + "External id": 293462,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 85 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333367697804.119, "dur": 31.608, + "args": { + "External id": 293463,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 86 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367697805.483, "dur": 0.720, + "args": { + "External id": 293464,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 87 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367697809.032, "dur": 0.595, + "args": { + "External id": 293465,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 88 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367697810.447, "dur": 2.416, + "args": { + "External id": 293466,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 89 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367697813.430, "dur": 0.498, + "args": { + "External id": 293467,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 90 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367697816.341, "dur": 0.775, + "args": { + "External id": 293468,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 91 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367697819.803, "dur": 0.354, + "args": { + "External id": 293469,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 92 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367697822.485, "dur": 0.313, + "args": { + "External id": 293470,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 93 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367697823.489, "dur": 2.385, + "args": { + "External id": 293471,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 94 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367697828.782, "dur": 0.343, + "args": { + "External id": 293472,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 95 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367697849.341, "dur": 26.199, + "args": { + "External id": 293473,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 96 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333367697954.327, "dur": 370.981, + "args": { + "External id": 293474,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 97 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367697988.679, "dur": 330.496, + "args": { + "External id": 293475,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 98, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333367698000.500, "dur": 311.365, + "args": { + "External id": 293476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 99 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367698357.089, "dur": 2.805, + "args": { + "External id": 293477,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 100, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367698484.594, "dur": 1865.190, + "args": { + "External id": 293478,"Sequence number": 1209224, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 101 + } + }, + { + "ph": "f", "id": 8, "pid": 2070552, "tid": 2107648, "ts": 5333367698484.594, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367698658.130, "dur": 138.660, + "args": { + "External id": 293479,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333367698851.022, "dur": 52.311, + "args": { + "External id": 293480,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333367698925.054, "dur": 52.992, + "args": { + "External id": 293481,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367698987.800, "dur": 36.769, + "args": { + "External id": 293482,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367699031.038, "dur": 47.483, + "args": { + "External id": 293483,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367699085.594, "dur": 28.493, + "args": { + "External id": 293484,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367699124.007, "dur": 60.784, + "args": { + "External id": 293485,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367699224.032, "dur": 35.253, + "args": { + "External id": 293486,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333367699282.793, "dur": 31.625, + "args": { + "External id": 293487,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367699339.891, "dur": 21.522, + "args": { + "External id": 293488,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333367699381.899, "dur": 16.307, + "args": { + "External id": 293489,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367699410.374, "dur": 36.725, + "args": { + "External id": 293490,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367699450.948, "dur": 33.648, + "args": { + "External id": 293491,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333367699518.003, "dur": 240.340, + "args": { + "External id": 293492,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367699615.809, "dur": 41.334, + "args": { + "External id": 293493,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367699660.779, "dur": 2.531, + "args": { + "External id": 293494,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367699798.069, "dur": 28.641, + "args": { + "External id": 293495,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367699840.626, "dur": 15.366, + "args": { + "External id": 293496,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367699866.030, "dur": 46.837, + "args": { + "External id": 293497,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367699918.454, "dur": 35.624, + "args": { + "External id": 293498,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367699960.685, "dur": 23.035, + "args": { + "External id": 293499,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367699988.395, "dur": 29.490, + "args": { + "External id": 293500,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367700025.500, "dur": 21.779, + "args": { + "External id": 293501,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367700053.724, "dur": 29.751, + "args": { + "External id": 293502,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333367700108.323, "dur": 26.368, + "args": { + "External id": 293503,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367700157.796, "dur": 46.886, + "args": { + "External id": 293504,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367700229.432, "dur": 19.064, + "args": { + "External id": 293505,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333367700266.539, "dur": 14.605, + "args": { + "External id": 293506,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333367700296.276, "dur": 17.172, + "args": { + "External id": 293507,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367700401.416, "dur": 16.667, + "args": { + "External id": 293508,"Record function id": 0, "Ev Idx": 131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367700404.708, "dur": 11.954, + "args": { + "External id": 293509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367700409.316, "dur": 6.336, + "args": { + "External id": 293510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367700410.992, "dur": 4.557, + "args": { + "External id": 293511,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367700422.032, "dur": 4.776, + "args": { + "External id": 293512,"Record function id": 0, "Ev Idx": 135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367700423.544, "dur": 2.852, + "args": { + "External id": 293513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367700424.428, "dur": 1.490, + "args": { + "External id": 293514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367700425.121, "dur": 0.709, + "args": { + "External id": 293515,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367700430.142, "dur": 4.439, + "args": { + "External id": 293516,"Record function id": 0, "Ev Idx": 139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367700431.333, "dur": 2.834, + "args": { + "External id": 293517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367700431.912, "dur": 1.851, + "args": { + "External id": 293518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367700432.746, "dur": 0.906, + "args": { + "External id": 293519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367700437.676, "dur": 4.318, + "args": { + "External id": 293520,"Record function id": 0, "Ev Idx": 143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367700439.053, "dur": 2.539, + "args": { + "External id": 293521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367700439.893, "dur": 1.279, + "args": { + "External id": 293522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367700440.438, "dur": 0.643, + "args": { + "External id": 293523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367700444.956, "dur": 5.298, + "args": { + "External id": 293524,"Record function id": 0, "Ev Idx": 147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367700447.376, "dur": 2.462, + "args": { + "External id": 293525,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367700447.811, "dur": 1.434, + "args": { + "External id": 293526,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367700448.275, "dur": 0.898, + "args": { + "External id": 293527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367700453.168, "dur": 4.651, + "args": { + "External id": 293528,"Record function id": 0, "Ev Idx": 151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367700454.759, "dur": 2.644, + "args": { + "External id": 293529,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367700455.749, "dur": 1.252, + "args": { + "External id": 293530,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367700456.298, "dur": 0.629, + "args": { + "External id": 293531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367700460.849, "dur": 8.352, + "args": { + "External id": 293532,"Record function id": 0, "Ev Idx": 155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367700462.069, "dur": 6.729, + "args": { + "External id": 293533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367700462.700, "dur": 5.658, + "args": { + "External id": 293534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367700465.663, "dur": 2.624, + "args": { + "External id": 293535,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367700472.203, "dur": 4.380, + "args": { + "External id": 293536,"Record function id": 0, "Ev Idx": 159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367700473.337, "dur": 2.846, + "args": { + "External id": 293537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367700474.026, "dur": 1.564, + "args": { + "External id": 293538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367700474.822, "dur": 0.696, + "args": { + "External id": 293539,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367700481.122, "dur": 4.202, + "args": { + "External id": 293540,"Record function id": 0, "Ev Idx": 163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367700482.346, "dur": 2.571, + "args": { + "External id": 293541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367700482.941, "dur": 1.543, + "args": { + "External id": 293542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367700483.611, "dur": 0.799, + "args": { + "External id": 293543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367700489.169, "dur": 36445.391, + "args": { + "External id": 293544,"Record function id": 0, "Sequence number": 1209223, "Fwd thread id": 1, "Ev Idx": 167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367700490.570, "dur": 36433.287, + "args": { + "External id": 293545,"Sequence number": 1209223, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 168 + } + }, + { + "ph": "f", "id": 9, "pid": 2070552, "tid": 2107648, "ts": 5333367700490.570, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.31)", "pid": 2070552, "tid": 2107648, + "ts": 5333367700525.180, "dur": 47.507, + "args": { + "External id": 293546,"Record function id": 0, "Ev Idx": 169 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.31)", "pid": 2070552, "tid": 2107648, + "ts": 5333367700580.612, "dur": 134.909, + "args": { + "External id": 293547,"Record function id": 0, "Ev Idx": 170 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.31)", "pid": 2070552, "tid": 2107648, + "ts": 5333367700725.418, "dur": 36186.174, + "args": { + "External id": 293548,"Record function id": 0, "Ev Idx": 171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367700789.242, "dur": 8.387, + "args": { + "External id": 293549,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367700808.880, "dur": 5.391, + "args": { + "External id": 293550,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333367700832.895, "dur": 35143.679, + "args": { + "External id": 293551,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333367700849.891, "dur": 35113.449, + "args": { + "External id": 293552,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367700902.904, "dur": 4.051, + "args": { + "External id": 293553,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333367700919.511, "dur": 34998.045, + "args": { + "External id": 293554,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367700922.071, "dur": 34994.405, + "args": { + "External id": 293555,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367700927.320, "dur": 5.101, + "args": { + "External id": 293556,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367700934.113, "dur": 34977.679, + "args": { + "External id": 293557,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367736095.281, "dur": 12.131, + "args": { + "External id": 293558,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367736099.238, "dur": 7.595, + "args": { + "External id": 293559,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333367736144.523, "dur": 386.328, + "args": { + "External id": 293560,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367736195.791, "dur": 329.437, + "args": { + "External id": 293561,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 184, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333367736213.261, "dur": 306.071, + "args": { + "External id": 293562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367736556.003, "dur": 2.437, + "args": { + "External id": 293563,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 186, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367736660.069, "dur": 8.889, + "args": { + "External id": 293564,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367736724.795, "dur": 1.443, + "args": { + "External id": 293565,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367736747.401, "dur": 1.455, + "args": { + "External id": 293566,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367736762.066, "dur": 1.478, + "args": { + "External id": 293567,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367736774.277, "dur": 3.159, + "args": { + "External id": 293568,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367736790.193, "dur": 1.167, + "args": { + "External id": 293569,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367736806.938, "dur": 1.347, + "args": { + "External id": 293570,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367736822.196, "dur": 3.281, + "args": { + "External id": 293571,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367736835.723, "dur": 3.549, + "args": { + "External id": 293572,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367736951.080, "dur": 3066.533, + "args": { + "External id": 293573,"Record function id": 0, "Ev Idx": 196 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.30)", "pid": 2070552, "tid": 2107648, + "ts": 5333367736972.972, "dur": 1177.978, + "args": { + "External id": 293574,"Record function id": 0, "Ev Idx": 197 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.30)", "pid": 2070552, "tid": 2107648, + "ts": 5333367736990.142, "dur": 400.564, + "args": { + "External id": 293575,"Record function id": 0, "Ev Idx": 198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367737079.408, "dur": 4.071, + "args": { + "External id": 293576,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367737086.901, "dur": 0.959, + "args": { + "External id": 293577,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367737089.468, "dur": 1.043, + "args": { + "External id": 293578,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367737094.434, "dur": 1.086, + "args": { + "External id": 293579,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367737099.146, "dur": 1.163, + "args": { + "External id": 293580,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367737101.728, "dur": 0.771, + "args": { + "External id": 293581,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367737103.791, "dur": 5.352, + "args": { + "External id": 293582,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367737112.867, "dur": 0.895, + "args": { + "External id": 293583,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367737115.723, "dur": 0.976, + "args": { + "External id": 293584,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367737117.994, "dur": 0.903, + "args": { + "External id": 293585,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367737139.774, "dur": 212.794, + "args": { + "External id": 293586,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367737160.789, "dur": 186.021, + "args": { + "External id": 293587,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367737199.822, "dur": 16.936, + "args": { + "External id": 293588,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333367737223.151, "dur": 89.329, + "args": { + "External id": 293589,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367737226.342, "dur": 85.689, + "args": { + "External id": 293590,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367737230.904, "dur": 10.018, + "args": { + "External id": 293591,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367737243.313, "dur": 68.015, + "args": { + "External id": 293592,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 215 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.29", "pid": 2070552, "tid": 2107648, + "ts": 5333367737491.825, "dur": 651.533, + "args": { + "External id": 293593,"Record function id": 0, "Ev Idx": 216 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.29)", "pid": 2070552, "tid": 2107648, + "ts": 5333367737512.525, "dur": 618.100, + "args": { + "External id": 293594,"Record function id": 0, "Ev Idx": 217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367737579.912, "dur": 5.269, + "args": { + "External id": 293595,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333367737602.314, "dur": 76.367, + "args": { + "External id": 293596,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367737606.650, "dur": 1.688, + "args": { + "External id": 293597,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367737610.539, "dur": 2.752, + "args": { + "External id": 293598,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367737615.927, "dur": 0.409, + "args": { + "External id": 293599,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367737656.456, "dur": 0.530, + "args": { + "External id": 293600,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367737658.392, "dur": 0.677, + "args": { + "External id": 293601,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367737661.409, "dur": 2.050, + "args": { + "External id": 293602,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367737666.528, "dur": 0.423, + "args": { + "External id": 293603,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367737668.870, "dur": 0.581, + "args": { + "External id": 293604,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367737671.540, "dur": 0.382, + "args": { + "External id": 293605,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367737690.972, "dur": 41.184, + "args": { + "External id": 293606,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333367737769.845, "dur": 112.418, + "args": { + "External id": 293607,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367737780.884, "dur": 6.336, + "args": { + "External id": 293608,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333367737792.890, "dur": 10.265, + "args": { + "External id": 293609,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333367737797.982, "dur": 4.736, + "args": { + "External id": 293610,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367737800.798, "dur": 0.610, + "args": { + "External id": 293611,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333367737810.151, "dur": 28.657, + "args": { + "External id": 293612,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367737813.332, "dur": 0.487, + "args": { + "External id": 293613,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367737815.065, "dur": 0.446, + "args": { + "External id": 293614,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367737818.458, "dur": 0.342, + "args": { + "External id": 293615,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367737820.929, "dur": 0.391, + "args": { + "External id": 293616,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367737822.011, "dur": 2.043, + "args": { + "External id": 293617,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367737825.846, "dur": 0.286, + "args": { + "External id": 293618,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367737826.948, "dur": 2.575, + "args": { + "External id": 293619,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367737832.190, "dur": 0.381, + "args": { + "External id": 293620,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367737833.335, "dur": 0.370, + "args": { + "External id": 293621,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367737851.925, "dur": 21.884, + "args": { + "External id": 293622,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333367737929.926, "dur": 127.637, + "args": { + "External id": 293623,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367737962.616, "dur": 91.726, + "args": { + "External id": 293624,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 247, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333367737971.903, "dur": 76.603, + "args": { + "External id": 293625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367738075.889, "dur": 2.051, + "args": { + "External id": 293626,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 249, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367738158.577, "dur": 1834.187, + "args": { + "External id": 293627,"Sequence number": 1209222, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 250 + } + }, + { + "ph": "f", "id": 10, "pid": 2070552, "tid": 2107648, "ts": 5333367738158.577, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367738315.780, "dur": 116.091, + "args": { + "External id": 293628,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333367738476.819, "dur": 48.270, + "args": { + "External id": 293629,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333367738548.676, "dur": 49.788, + "args": { + "External id": 293630,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367738612.027, "dur": 76.147, + "args": { + "External id": 293631,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367738698.912, "dur": 47.746, + "args": { + "External id": 293632,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367738755.192, "dur": 27.528, + "args": { + "External id": 293633,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367738793.712, "dur": 42.209, + "args": { + "External id": 293634,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367738871.283, "dur": 36.638, + "args": { + "External id": 293635,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333367738931.606, "dur": 35.190, + "args": { + "External id": 293636,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367738989.005, "dur": 20.503, + "args": { + "External id": 293637,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333367739022.988, "dur": 13.410, + "args": { + "External id": 293638,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367739048.365, "dur": 28.591, + "args": { + "External id": 293639,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367739080.129, "dur": 31.643, + "args": { + "External id": 293640,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333367739140.839, "dur": 242.663, + "args": { + "External id": 293641,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367739259.367, "dur": 8.029, + "args": { + "External id": 293642,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367739270.503, "dur": 4.853, + "args": { + "External id": 293643,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367739430.713, "dur": 29.272, + "args": { + "External id": 293644,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367739473.552, "dur": 13.249, + "args": { + "External id": 293645,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367739495.778, "dur": 50.568, + "args": { + "External id": 293646,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367739552.401, "dur": 37.487, + "args": { + "External id": 293647,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367739599.239, "dur": 54.659, + "args": { + "External id": 293648,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367739663.465, "dur": 36.154, + "args": { + "External id": 293649,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367739705.848, "dur": 21.613, + "args": { + "External id": 293650,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367739736.866, "dur": 32.970, + "args": { + "External id": 293651,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333367739791.898, "dur": 26.622, + "args": { + "External id": 293652,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367739840.379, "dur": 24.183, + "args": { + "External id": 293653,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367739882.445, "dur": 17.345, + "args": { + "External id": 293654,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333367739916.189, "dur": 13.110, + "args": { + "External id": 293655,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333367739947.472, "dur": 17.031, + "args": { + "External id": 293656,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367740040.273, "dur": 15.494, + "args": { + "External id": 293657,"Record function id": 0, "Ev Idx": 280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367740043.843, "dur": 10.708, + "args": { + "External id": 293658,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367740047.840, "dur": 5.960, + "args": { + "External id": 293659,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367740049.458, "dur": 4.229, + "args": { + "External id": 293660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367740059.831, "dur": 4.834, + "args": { + "External id": 293661,"Record function id": 0, "Ev Idx": 284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367740061.481, "dur": 2.706, + "args": { + "External id": 293662,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367740062.365, "dur": 1.344, + "args": { + "External id": 293663,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367740062.879, "dur": 0.757, + "args": { + "External id": 293664,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367740067.953, "dur": 4.903, + "args": { + "External id": 293665,"Record function id": 0, "Ev Idx": 288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367740069.736, "dur": 2.696, + "args": { + "External id": 293666,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367740070.404, "dur": 1.500, + "args": { + "External id": 293667,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367740071.067, "dur": 0.723, + "args": { + "External id": 293668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367740076.029, "dur": 6.278, + "args": { + "External id": 293669,"Record function id": 0, "Ev Idx": 292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367740077.543, "dur": 4.323, + "args": { + "External id": 293670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367740078.283, "dur": 3.080, + "args": { + "External id": 293671,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367740078.729, "dur": 2.531, + "args": { + "External id": 293672,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367740085.349, "dur": 4.234, + "args": { + "External id": 293673,"Record function id": 0, "Ev Idx": 296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367740086.997, "dur": 2.153, + "args": { + "External id": 293674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367740087.664, "dur": 1.069, + "args": { + "External id": 293675,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367740087.937, "dur": 0.726, + "args": { + "External id": 293676,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367740092.588, "dur": 4.547, + "args": { + "External id": 293677,"Record function id": 0, "Ev Idx": 300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367740094.456, "dur": 2.254, + "args": { + "External id": 293678,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367740095.167, "dur": 1.076, + "args": { + "External id": 293679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367740095.530, "dur": 0.639, + "args": { + "External id": 293680,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367740100.324, "dur": 6.569, + "args": { + "External id": 293681,"Record function id": 0, "Ev Idx": 304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367740101.891, "dur": 4.593, + "args": { + "External id": 293682,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367740102.506, "dur": 3.487, + "args": { + "External id": 293683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367740105.016, "dur": 0.891, + "args": { + "External id": 293684,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367740109.899, "dur": 3.789, + "args": { + "External id": 293685,"Record function id": 0, "Ev Idx": 308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367740111.032, "dur": 2.241, + "args": { + "External id": 293686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367740111.655, "dur": 1.213, + "args": { + "External id": 293687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367740112.115, "dur": 0.662, + "args": { + "External id": 293688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367740116.671, "dur": 3.464, + "args": { + "External id": 293689,"Record function id": 0, "Ev Idx": 312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367740117.841, "dur": 1.886, + "args": { + "External id": 293690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367740118.319, "dur": 1.020, + "args": { + "External id": 293691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367740118.584, "dur": 0.685, + "args": { + "External id": 293692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367740124.861, "dur": 36297.499, + "args": { + "External id": 293693,"Record function id": 0, "Sequence number": 1209221, "Fwd thread id": 1, "Ev Idx": 316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367740126.031, "dur": 36287.582, + "args": { + "External id": 293694,"Sequence number": 1209221, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 317 + } + }, + { + "ph": "f", "id": 11, "pid": 2070552, "tid": 2107648, "ts": 5333367740126.031, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.30)", "pid": 2070552, "tid": 2107648, + "ts": 5333367740155.990, "dur": 67.506, + "args": { + "External id": 293695,"Record function id": 0, "Ev Idx": 318 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.30)", "pid": 2070552, "tid": 2107648, + "ts": 5333367740235.594, "dur": 73.814, + "args": { + "External id": 293696,"Record function id": 0, "Ev Idx": 319 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.30)", "pid": 2070552, "tid": 2107648, + "ts": 5333367740317.482, "dur": 36087.994, + "args": { + "External id": 293697,"Record function id": 0, "Ev Idx": 320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367740411.842, "dur": 7.408, + "args": { + "External id": 293698,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367740431.077, "dur": 5.018, + "args": { + "External id": 293699,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333367740450.486, "dur": 35046.900, + "args": { + "External id": 293700,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333367740467.691, "dur": 35018.411, + "args": { + "External id": 293701,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367740515.148, "dur": 19.784, + "args": { + "External id": 293702,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333367740542.109, "dur": 34904.574, + "args": { + "External id": 293703,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367740546.473, "dur": 34899.310, + "args": { + "External id": 293704,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367740550.556, "dur": 5.013, + "args": { + "External id": 293705,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367740557.093, "dur": 34884.510, + "args": { + "External id": 293706,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367775598.517, "dur": 10.715, + "args": { + "External id": 293707,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367775601.862, "dur": 7.047, + "args": { + "External id": 293708,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333367775667.158, "dur": 375.829, + "args": { + "External id": 293709,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367775699.730, "dur": 338.524, + "args": { + "External id": 293710,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 333, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333367775712.790, "dur": 320.119, + "args": { + "External id": 293711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367776065.420, "dur": 2.448, + "args": { + "External id": 293712,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 335, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367776129.381, "dur": 6.956, + "args": { + "External id": 293713,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367776212.989, "dur": 3.028, + "args": { + "External id": 293714,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367776242.717, "dur": 1.644, + "args": { + "External id": 293715,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367776261.092, "dur": 0.948, + "args": { + "External id": 293716,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367776275.830, "dur": 0.779, + "args": { + "External id": 293717,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367776289.163, "dur": 1.151, + "args": { + "External id": 293718,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367776305.002, "dur": 1.053, + "args": { + "External id": 293719,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367776321.479, "dur": 3.129, + "args": { + "External id": 293720,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367776336.217, "dur": 0.857, + "args": { + "External id": 293721,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367776439.279, "dur": 3006.037, + "args": { + "External id": 293722,"Record function id": 0, "Ev Idx": 345 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.29)", "pid": 2070552, "tid": 2107648, + "ts": 5333367776461.076, "dur": 1145.270, + "args": { + "External id": 293723,"Record function id": 0, "Ev Idx": 346 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.29)", "pid": 2070552, "tid": 2107648, + "ts": 5333367776477.346, "dur": 375.222, + "args": { + "External id": 293724,"Record function id": 0, "Ev Idx": 347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367776559.290, "dur": 4.298, + "args": { + "External id": 293725,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367776567.260, "dur": 1.158, + "args": { + "External id": 293726,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367776570.454, "dur": 1.016, + "args": { + "External id": 293727,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367776575.210, "dur": 2.852, + "args": { + "External id": 293728,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367776579.555, "dur": 1.040, + "args": { + "External id": 293729,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367776582.044, "dur": 1.057, + "args": { + "External id": 293730,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367776584.644, "dur": 2.610, + "args": { + "External id": 293731,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367776590.744, "dur": 1.142, + "args": { + "External id": 293732,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367776593.445, "dur": 0.835, + "args": { + "External id": 293733,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367776595.922, "dur": 1.120, + "args": { + "External id": 293734,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367776615.244, "dur": 204.587, + "args": { + "External id": 293735,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367776668.884, "dur": 145.906, + "args": { + "External id": 293736,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367776689.711, "dur": 15.569, + "args": { + "External id": 293737,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333367776709.723, "dur": 75.692, + "args": { + "External id": 293738,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367776712.706, "dur": 72.397, + "args": { + "External id": 293739,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367776716.566, "dur": 10.447, + "args": { + "External id": 293740,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367776728.568, "dur": 55.935, + "args": { + "External id": 293741,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 364 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.28", "pid": 2070552, "tid": 2107648, + "ts": 5333367776947.319, "dur": 651.338, + "args": { + "External id": 293742,"Record function id": 0, "Ev Idx": 365 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.28)", "pid": 2070552, "tid": 2107648, + "ts": 5333367776965.184, "dur": 619.767, + "args": { + "External id": 293743,"Record function id": 0, "Ev Idx": 366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367777028.915, "dur": 4.580, + "args": { + "External id": 293744,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333367777049.504, "dur": 39.602, + "args": { + "External id": 293745,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367777054.499, "dur": 1.718, + "args": { + "External id": 293746,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367777061.214, "dur": 0.463, + "args": { + "External id": 293747,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367777063.325, "dur": 0.509, + "args": { + "External id": 293748,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367777065.279, "dur": 1.900, + "args": { + "External id": 293749,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367777068.982, "dur": 0.300, + "args": { + "External id": 293750,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367777070.987, "dur": 0.549, + "args": { + "External id": 293751,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367777075.419, "dur": 2.487, + "args": { + "External id": 293752,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367777079.275, "dur": 0.596, + "args": { + "External id": 293753,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367777081.655, "dur": 0.555, + "args": { + "External id": 293754,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367777099.354, "dur": 32.624, + "args": { + "External id": 293755,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333367777164.144, "dur": 174.961, + "args": { + "External id": 293756,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367777197.747, "dur": 5.693, + "args": { + "External id": 293757,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333367777210.003, "dur": 15.123, + "args": { + "External id": 293758,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333367777215.058, "dur": 9.500, + "args": { + "External id": 293759,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367777219.769, "dur": 2.687, + "args": { + "External id": 293760,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333367777234.992, "dur": 48.724, + "args": { + "External id": 293761,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367777238.453, "dur": 0.874, + "args": { + "External id": 293762,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367777241.537, "dur": 1.119, + "args": { + "External id": 293763,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367777245.001, "dur": 0.460, + "args": { + "External id": 293764,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367777253.466, "dur": 2.373, + "args": { + "External id": 293765,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367777258.229, "dur": 0.626, + "args": { + "External id": 293766,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367777261.065, "dur": 2.293, + "args": { + "External id": 293767,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367777265.442, "dur": 0.568, + "args": { + "External id": 293768,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367777270.916, "dur": 0.738, + "args": { + "External id": 293769,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367777276.062, "dur": 0.678, + "args": { + "External id": 293770,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367777299.274, "dur": 30.873, + "args": { + "External id": 293771,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333367777390.015, "dur": 124.301, + "args": { + "External id": 293772,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367777422.441, "dur": 88.452, + "args": { + "External id": 293773,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 396, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333367777432.524, "dur": 73.675, + "args": { + "External id": 293774,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367777531.741, "dur": 1.758, + "args": { + "External id": 293775,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 398, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367777614.485, "dur": 1801.430, + "args": { + "External id": 293776,"Sequence number": 1209220, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 399 + } + }, + { + "ph": "f", "id": 12, "pid": 2070552, "tid": 2107648, "ts": 5333367777614.485, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367777775.936, "dur": 114.267, + "args": { + "External id": 293777,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333367777936.026, "dur": 40.169, + "args": { + "External id": 293778,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333367777993.130, "dur": 48.808, + "args": { + "External id": 293779,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367778052.523, "dur": 32.081, + "args": { + "External id": 293780,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367778092.782, "dur": 45.891, + "args": { + "External id": 293781,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367778144.678, "dur": 49.457, + "args": { + "External id": 293782,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367778208.908, "dur": 58.206, + "args": { + "External id": 293783,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367778297.354, "dur": 27.732, + "args": { + "External id": 293784,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333367778345.201, "dur": 27.815, + "args": { + "External id": 293785,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367778397.782, "dur": 20.760, + "args": { + "External id": 293786,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333367778434.503, "dur": 14.912, + "args": { + "External id": 293787,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367778457.722, "dur": 30.998, + "args": { + "External id": 293788,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367778491.683, "dur": 33.634, + "args": { + "External id": 293789,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333367778561.290, "dur": 221.240, + "args": { + "External id": 293790,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367778682.974, "dur": 8.540, + "args": { + "External id": 293791,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367778693.580, "dur": 2.845, + "args": { + "External id": 293792,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367778836.619, "dur": 32.847, + "args": { + "External id": 293793,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367778881.549, "dur": 14.875, + "args": { + "External id": 293794,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367778909.024, "dur": 45.797, + "args": { + "External id": 293795,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367778963.322, "dur": 37.027, + "args": { + "External id": 293796,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367779008.647, "dur": 24.181, + "args": { + "External id": 293797,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367779037.261, "dur": 29.860, + "args": { + "External id": 293798,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367779072.437, "dur": 22.486, + "args": { + "External id": 293799,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367779101.588, "dur": 29.193, + "args": { + "External id": 293800,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333367779155.450, "dur": 43.226, + "args": { + "External id": 293801,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367779232.153, "dur": 40.139, + "args": { + "External id": 293802,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367779290.916, "dur": 19.042, + "args": { + "External id": 293803,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333367779334.085, "dur": 15.242, + "args": { + "External id": 293804,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333367779364.615, "dur": 16.690, + "args": { + "External id": 293805,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367779471.258, "dur": 16.426, + "args": { + "External id": 293806,"Record function id": 0, "Ev Idx": 429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367779474.750, "dur": 11.825, + "args": { + "External id": 293807,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367779479.631, "dur": 6.061, + "args": { + "External id": 293808,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367779481.296, "dur": 4.307, + "args": { + "External id": 293809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367779491.886, "dur": 4.937, + "args": { + "External id": 293810,"Record function id": 0, "Ev Idx": 433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367779493.320, "dur": 3.066, + "args": { + "External id": 293811,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367779494.193, "dur": 1.666, + "args": { + "External id": 293812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367779494.957, "dur": 0.779, + "args": { + "External id": 293813,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367779500.256, "dur": 6.478, + "args": { + "External id": 293814,"Record function id": 0, "Ev Idx": 437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367779501.668, "dur": 4.644, + "args": { + "External id": 293815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367779502.268, "dur": 3.581, + "args": { + "External id": 293816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367779502.889, "dur": 2.870, + "args": { + "External id": 293817,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367779509.951, "dur": 4.297, + "args": { + "External id": 293818,"Record function id": 0, "Ev Idx": 441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367779511.452, "dur": 2.393, + "args": { + "External id": 293819,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367779512.000, "dur": 1.282, + "args": { + "External id": 293820,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367779512.493, "dur": 0.697, + "args": { + "External id": 293821,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367779517.263, "dur": 3.919, + "args": { + "External id": 293822,"Record function id": 0, "Ev Idx": 445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367779518.566, "dur": 2.186, + "args": { + "External id": 293823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367779519.226, "dur": 1.078, + "args": { + "External id": 293824,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367779519.543, "dur": 0.687, + "args": { + "External id": 293825,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367779524.205, "dur": 3.977, + "args": { + "External id": 293826,"Record function id": 0, "Ev Idx": 449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367779525.626, "dur": 2.151, + "args": { + "External id": 293827,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367779526.112, "dur": 1.227, + "args": { + "External id": 293828,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367779526.594, "dur": 0.670, + "args": { + "External id": 293829,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367779531.721, "dur": 4.875, + "args": { + "External id": 293830,"Record function id": 0, "Ev Idx": 453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367779533.089, "dur": 3.096, + "args": { + "External id": 293831,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367779534.002, "dur": 1.739, + "args": { + "External id": 293832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367779534.950, "dur": 0.720, + "args": { + "External id": 293833,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367779539.785, "dur": 3.603, + "args": { + "External id": 293834,"Record function id": 0, "Ev Idx": 457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367779540.794, "dur": 2.199, + "args": { + "External id": 293835,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367779541.530, "dur": 1.042, + "args": { + "External id": 293836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367779541.816, "dur": 0.681, + "args": { + "External id": 293837,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367779547.746, "dur": 5.647, + "args": { + "External id": 293838,"Record function id": 0, "Ev Idx": 461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367779548.885, "dur": 4.117, + "args": { + "External id": 293839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367779549.362, "dur": 3.090, + "args": { + "External id": 293840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367779551.784, "dur": 0.594, + "args": { + "External id": 293841,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367779557.132, "dur": 35376.150, + "args": { + "External id": 293842,"Record function id": 0, "Sequence number": 1209219, "Fwd thread id": 1, "Ev Idx": 465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367779558.458, "dur": 35366.027, + "args": { + "External id": 293843,"Sequence number": 1209219, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 466 + } + }, + { + "ph": "f", "id": 13, "pid": 2070552, "tid": 2107648, "ts": 5333367779558.458, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.29)", "pid": 2070552, "tid": 2107648, + "ts": 5333367779587.514, "dur": 79.474, + "args": { + "External id": 293844,"Record function id": 0, "Ev Idx": 467 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.29)", "pid": 2070552, "tid": 2107648, + "ts": 5333367779676.811, "dur": 68.013, + "args": { + "External id": 293845,"Record function id": 0, "Ev Idx": 468 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.29)", "pid": 2070552, "tid": 2107648, + "ts": 5333367779750.456, "dur": 35165.981, + "args": { + "External id": 293846,"Record function id": 0, "Ev Idx": 469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367779844.723, "dur": 7.436, + "args": { + "External id": 293847,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367779865.790, "dur": 6.975, + "args": { + "External id": 293848,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333367779891.251, "dur": 34109.188, + "args": { + "External id": 293849,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333367779904.754, "dur": 34083.650, + "args": { + "External id": 293850,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367779947.585, "dur": 15.048, + "args": { + "External id": 293851,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333367779969.051, "dur": 33974.162, + "args": { + "External id": 293852,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367779971.593, "dur": 33970.721, + "args": { + "External id": 293853,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367779977.275, "dur": 5.078, + "args": { + "External id": 293854,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367779983.870, "dur": 33953.485, + "args": { + "External id": 293855,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367814108.212, "dur": 12.128, + "args": { + "External id": 293856,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367814111.787, "dur": 7.927, + "args": { + "External id": 293857,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333367814152.297, "dur": 392.115, + "args": { + "External id": 293858,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367814200.161, "dur": 338.100, + "args": { + "External id": 293859,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 482, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333367814218.340, "dur": 314.047, + "args": { + "External id": 293860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367814569.661, "dur": 2.152, + "args": { + "External id": 293861,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 484, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367814686.749, "dur": 7.602, + "args": { + "External id": 293862,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367814743.500, "dur": 1.502, + "args": { + "External id": 293863,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367814762.649, "dur": 3.519, + "args": { + "External id": 293864,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367814780.147, "dur": 1.076, + "args": { + "External id": 293865,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367814795.703, "dur": 1.271, + "args": { + "External id": 293866,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367814809.047, "dur": 1.187, + "args": { + "External id": 293867,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367814821.750, "dur": 2.924, + "args": { + "External id": 293868,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367814837.517, "dur": 2.779, + "args": { + "External id": 293869,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367814851.400, "dur": 0.978, + "args": { + "External id": 293870,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367814950.312, "dur": 3025.592, + "args": { + "External id": 293871,"Record function id": 0, "Ev Idx": 494 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.28)", "pid": 2070552, "tid": 2107648, + "ts": 5333367814972.837, "dur": 1160.324, + "args": { + "External id": 293872,"Record function id": 0, "Ev Idx": 495 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.28)", "pid": 2070552, "tid": 2107648, + "ts": 5333367814989.815, "dur": 396.346, + "args": { + "External id": 293873,"Record function id": 0, "Ev Idx": 496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367815073.380, "dur": 4.465, + "args": { + "External id": 293874,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367815081.496, "dur": 1.079, + "args": { + "External id": 293875,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367815084.282, "dur": 2.838, + "args": { + "External id": 293876,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367815089.001, "dur": 0.875, + "args": { + "External id": 293877,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367815091.434, "dur": 0.716, + "args": { + "External id": 293878,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367815095.450, "dur": 1.150, + "args": { + "External id": 293879,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367815098.359, "dur": 2.994, + "args": { + "External id": 293880,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367815102.600, "dur": 0.958, + "args": { + "External id": 293881,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367815105.232, "dur": 1.021, + "args": { + "External id": 293882,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367815109.923, "dur": 1.003, + "args": { + "External id": 293883,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367815129.084, "dur": 217.632, + "args": { + "External id": 293884,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367815145.470, "dur": 194.553, + "args": { + "External id": 293885,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367815163.084, "dur": 35.072, + "args": { + "External id": 293886,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333367815204.376, "dur": 106.276, + "args": { + "External id": 293887,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367815207.539, "dur": 102.474, + "args": { + "External id": 293888,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367815238.463, "dur": 10.042, + "args": { + "External id": 293889,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367815250.908, "dur": 58.446, + "args": { + "External id": 293890,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 513 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.27", "pid": 2070552, "tid": 2107648, + "ts": 5333367815483.642, "dur": 641.372, + "args": { + "External id": 293891,"Record function id": 0, "Ev Idx": 514 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.27)", "pid": 2070552, "tid": 2107648, + "ts": 5333367815504.821, "dur": 607.028, + "args": { + "External id": 293892,"Record function id": 0, "Ev Idx": 515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367815570.201, "dur": 4.967, + "args": { + "External id": 293893,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333367815591.841, "dur": 77.999, + "args": { + "External id": 293894,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367815597.392, "dur": 2.043, + "args": { + "External id": 293895,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367815601.787, "dur": 2.757, + "args": { + "External id": 293896,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367815606.555, "dur": 0.401, + "args": { + "External id": 293897,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367815608.688, "dur": 0.366, + "args": { + "External id": 293898,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367815612.473, "dur": 0.341, + "args": { + "External id": 293899,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367815614.211, "dur": 2.517, + "args": { + "External id": 293900,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367815618.151, "dur": 36.257, + "args": { + "External id": 293901,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367815659.629, "dur": 0.467, + "args": { + "External id": 293902,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367815661.529, "dur": 0.749, + "args": { + "External id": 293903,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367815681.926, "dur": 36.898, + "args": { + "External id": 293904,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333367815753.903, "dur": 117.478, + "args": { + "External id": 293905,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367815766.822, "dur": 4.425, + "args": { + "External id": 293906,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333367815776.025, "dur": 12.638, + "args": { + "External id": 293907,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333367815782.633, "dur": 5.584, + "args": { + "External id": 293908,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367815786.497, "dur": 0.591, + "args": { + "External id": 293909,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333367815796.021, "dur": 36.853, + "args": { + "External id": 293910,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367815798.655, "dur": 0.589, + "args": { + "External id": 293911,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367815802.823, "dur": 0.471, + "args": { + "External id": 293912,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367815805.132, "dur": 5.328, + "args": { + "External id": 293913,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367815812.030, "dur": 2.400, + "args": { + "External id": 293914,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367815815.934, "dur": 0.372, + "args": { + "External id": 293915,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367815817.821, "dur": 0.551, + "args": { + "External id": 293916,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367815822.086, "dur": 0.274, + "args": { + "External id": 293917,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367815823.912, "dur": 0.344, + "args": { + "External id": 293918,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367815825.566, "dur": 0.471, + "args": { + "External id": 293919,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367815844.697, "dur": 19.106, + "args": { + "External id": 293920,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333367815917.565, "dur": 124.734, + "args": { + "External id": 293921,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367815947.037, "dur": 91.988, + "args": { + "External id": 293922,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 545, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333367815956.893, "dur": 77.945, + "args": { + "External id": 293923,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367816060.663, "dur": 1.860, + "args": { + "External id": 293924,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 547, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367816141.759, "dur": 1808.737, + "args": { + "External id": 293925,"Sequence number": 1209218, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 548 + } + }, + { + "ph": "f", "id": 14, "pid": 2070552, "tid": 2107648, "ts": 5333367816141.759, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367816292.053, "dur": 121.551, + "args": { + "External id": 293926,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333367816461.010, "dur": 38.831, + "args": { + "External id": 293927,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333367816533.821, "dur": 53.329, + "args": { + "External id": 293928,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367816601.346, "dur": 71.956, + "args": { + "External id": 293929,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367816684.430, "dur": 49.536, + "args": { + "External id": 293930,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367816740.750, "dur": 28.724, + "args": { + "External id": 293931,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367816776.207, "dur": 42.154, + "args": { + "External id": 293932,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367816852.254, "dur": 24.475, + "args": { + "External id": 293933,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333367816896.988, "dur": 27.733, + "args": { + "External id": 293934,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367816946.909, "dur": 20.859, + "args": { + "External id": 293935,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333367816981.543, "dur": 15.028, + "args": { + "External id": 293936,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367817004.316, "dur": 30.365, + "args": { + "External id": 293937,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367817037.622, "dur": 31.617, + "args": { + "External id": 293938,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333367817102.287, "dur": 218.293, + "args": { + "External id": 293939,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367817207.021, "dur": 8.231, + "args": { + "External id": 293940,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367817217.794, "dur": 3.354, + "args": { + "External id": 293941,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367817378.706, "dur": 31.515, + "args": { + "External id": 293942,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367817424.205, "dur": 15.024, + "args": { + "External id": 293943,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367817449.718, "dur": 53.610, + "args": { + "External id": 293944,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367817509.135, "dur": 36.658, + "args": { + "External id": 293945,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367817557.342, "dur": 21.053, + "args": { + "External id": 293946,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367817582.656, "dur": 30.544, + "args": { + "External id": 293947,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367817654.559, "dur": 26.277, + "args": { + "External id": 293948,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367817691.872, "dur": 30.224, + "args": { + "External id": 293949,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333367817746.878, "dur": 24.554, + "args": { + "External id": 293950,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367817796.276, "dur": 25.610, + "args": { + "External id": 293951,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367817841.290, "dur": 18.220, + "args": { + "External id": 293952,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333367817876.437, "dur": 13.735, + "args": { + "External id": 293953,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333367817904.781, "dur": 15.222, + "args": { + "External id": 293954,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367818001.955, "dur": 15.513, + "args": { + "External id": 293955,"Record function id": 0, "Ev Idx": 578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367818005.400, "dur": 11.087, + "args": { + "External id": 293956,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367818009.704, "dur": 5.871, + "args": { + "External id": 293957,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367818011.331, "dur": 4.150, + "args": { + "External id": 293958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367818021.208, "dur": 7.774, + "args": { + "External id": 293959,"Record function id": 0, "Ev Idx": 582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367818022.658, "dur": 5.825, + "args": { + "External id": 293960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367818023.965, "dur": 3.790, + "args": { + "External id": 293961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367818024.558, "dur": 3.062, + "args": { + "External id": 293962,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367818032.227, "dur": 5.762, + "args": { + "External id": 293963,"Record function id": 0, "Ev Idx": 586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367818033.846, "dur": 3.739, + "args": { + "External id": 293964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367818034.884, "dur": 2.023, + "args": { + "External id": 293965,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367818035.824, "dur": 0.973, + "args": { + "External id": 293966,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367818041.276, "dur": 5.148, + "args": { + "External id": 293967,"Record function id": 0, "Ev Idx": 590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367818043.380, "dur": 2.622, + "args": { + "External id": 293968,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367818044.272, "dur": 1.286, + "args": { + "External id": 293969,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367818044.583, "dur": 0.847, + "args": { + "External id": 293970,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367818049.563, "dur": 7.663, + "args": { + "External id": 293971,"Record function id": 0, "Ev Idx": 594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367818050.958, "dur": 5.855, + "args": { + "External id": 293972,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367818051.726, "dur": 1.139, + "args": { + "External id": 293973,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367818052.143, "dur": 0.650, + "args": { + "External id": 293974,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367818060.284, "dur": 3.581, + "args": { + "External id": 293975,"Record function id": 0, "Ev Idx": 598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367818061.421, "dur": 2.050, + "args": { + "External id": 293976,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367818061.868, "dur": 1.177, + "args": { + "External id": 293977,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367818062.338, "dur": 0.631, + "args": { + "External id": 293978,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367818067.042, "dur": 4.027, + "args": { + "External id": 293979,"Record function id": 0, "Ev Idx": 602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367818068.252, "dur": 2.412, + "args": { + "External id": 293980,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367818068.924, "dur": 1.327, + "args": { + "External id": 293981,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367818069.514, "dur": 0.672, + "args": { + "External id": 293982,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367818074.166, "dur": 5.671, + "args": { + "External id": 293983,"Record function id": 0, "Ev Idx": 606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367818075.391, "dur": 4.046, + "args": { + "External id": 293984,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367818075.850, "dur": 3.011, + "args": { + "External id": 293985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367818078.106, "dur": 0.658, + "args": { + "External id": 293986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367818082.964, "dur": 5.335, + "args": { + "External id": 293987,"Record function id": 0, "Ev Idx": 610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367818084.113, "dur": 3.784, + "args": { + "External id": 293988,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367818084.629, "dur": 2.809, + "args": { + "External id": 293989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367818084.902, "dur": 2.466, + "args": { + "External id": 293990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367818093.591, "dur": 36503.672, + "args": { + "External id": 293991,"Record function id": 0, "Sequence number": 1209217, "Fwd thread id": 1, "Ev Idx": 614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367818095.172, "dur": 36492.726, + "args": { + "External id": 293992,"Sequence number": 1209217, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 615 + } + }, + { + "ph": "f", "id": 15, "pid": 2070552, "tid": 2107648, "ts": 5333367818095.172, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.28)", "pid": 2070552, "tid": 2107648, + "ts": 5333367818126.937, "dur": 64.746, + "args": { + "External id": 293993,"Record function id": 0, "Ev Idx": 616 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.28)", "pid": 2070552, "tid": 2107648, + "ts": 5333367818207.627, "dur": 92.580, + "args": { + "External id": 293994,"Record function id": 0, "Ev Idx": 617 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.28)", "pid": 2070552, "tid": 2107648, + "ts": 5333367818306.273, "dur": 36274.053, + "args": { + "External id": 293995,"Record function id": 0, "Ev Idx": 618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367818401.287, "dur": 8.123, + "args": { + "External id": 293996,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367818425.573, "dur": 5.241, + "args": { + "External id": 293997,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333367818448.747, "dur": 35223.874, + "args": { + "External id": 293998,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333367818462.360, "dur": 35198.963, + "args": { + "External id": 293999,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367818505.927, "dur": 14.470, + "args": { + "External id": 294000,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333367818526.567, "dur": 35070.324, + "args": { + "External id": 294001,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367818530.707, "dur": 35065.270, + "args": { + "External id": 294002,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367818535.055, "dur": 6.765, + "args": { + "External id": 294003,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367818543.507, "dur": 35048.027, + "args": { + "External id": 294004,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367853775.888, "dur": 11.460, + "args": { + "External id": 294005,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367853779.388, "dur": 7.609, + "args": { + "External id": 294006,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333367853817.925, "dur": 433.431, + "args": { + "External id": 294007,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367853850.413, "dur": 394.320, + "args": { + "External id": 294008,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 631, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333367853863.305, "dur": 373.746, + "args": { + "External id": 294009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367854279.954, "dur": 2.560, + "args": { + "External id": 294010,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 633, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367854350.502, "dur": 6.977, + "args": { + "External id": 294011,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367854407.210, "dur": 3.549, + "args": { + "External id": 294012,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367854428.091, "dur": 1.439, + "args": { + "External id": 294013,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367854442.209, "dur": 1.369, + "args": { + "External id": 294014,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367854457.686, "dur": 1.449, + "args": { + "External id": 294015,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367854470.492, "dur": 3.050, + "args": { + "External id": 294016,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367854484.938, "dur": 1.150, + "args": { + "External id": 294017,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367854499.935, "dur": 2.805, + "args": { + "External id": 294018,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367854514.728, "dur": 1.064, + "args": { + "External id": 294019,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367854612.858, "dur": 2965.799, + "args": { + "External id": 294020,"Record function id": 0, "Ev Idx": 643 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.27)", "pid": 2070552, "tid": 2107648, + "ts": 5333367854674.236, "dur": 1128.843, + "args": { + "External id": 294021,"Record function id": 0, "Ev Idx": 644 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.27)", "pid": 2070552, "tid": 2107648, + "ts": 5333367854689.745, "dur": 337.701, + "args": { + "External id": 294022,"Record function id": 0, "Ev Idx": 645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367854777.533, "dur": 6.698, + "args": { + "External id": 294023,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367854788.021, "dur": 1.121, + "args": { + "External id": 294024,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367854790.930, "dur": 1.108, + "args": { + "External id": 294025,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367854794.022, "dur": 1.203, + "args": { + "External id": 294026,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367854796.978, "dur": 0.958, + "args": { + "External id": 294027,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367854801.384, "dur": 0.789, + "args": { + "External id": 294028,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367854803.865, "dur": 2.953, + "args": { + "External id": 294029,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367854808.142, "dur": 0.802, + "args": { + "External id": 294030,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367854810.353, "dur": 2.932, + "args": { + "External id": 294031,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367854816.820, "dur": 1.157, + "args": { + "External id": 294032,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367854837.267, "dur": 159.507, + "args": { + "External id": 294033,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367854853.315, "dur": 139.155, + "args": { + "External id": 294034,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367854873.492, "dur": 13.019, + "args": { + "External id": 294035,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333367854890.578, "dur": 73.537, + "args": { + "External id": 294036,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367854893.134, "dur": 70.685, + "args": { + "External id": 294037,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367854898.374, "dur": 8.372, + "args": { + "External id": 294038,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367854908.460, "dur": 54.816, + "args": { + "External id": 294039,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 662 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.26", "pid": 2070552, "tid": 2107648, + "ts": 5333367855116.959, "dur": 678.125, + "args": { + "External id": 294040,"Record function id": 0, "Ev Idx": 663 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.26)", "pid": 2070552, "tid": 2107648, + "ts": 5333367855134.152, "dur": 647.978, + "args": { + "External id": 294041,"Record function id": 0, "Ev Idx": 664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367855220.533, "dur": 8.448, + "args": { + "External id": 294042,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333367855249.887, "dur": 40.079, + "args": { + "External id": 294043,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367855255.506, "dur": 1.964, + "args": { + "External id": 294044,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367855260.131, "dur": 2.065, + "args": { + "External id": 294045,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367855264.070, "dur": 0.426, + "args": { + "External id": 294046,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367855266.101, "dur": 2.689, + "args": { + "External id": 294047,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367855272.213, "dur": 0.669, + "args": { + "External id": 294048,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367855274.536, "dur": 0.578, + "args": { + "External id": 294049,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367855276.887, "dur": 0.251, + "args": { + "External id": 294050,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367855279.915, "dur": 0.365, + "args": { + "External id": 294051,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367855282.002, "dur": 0.553, + "args": { + "External id": 294052,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367855301.967, "dur": 35.949, + "args": { + "External id": 294053,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333367855370.324, "dur": 110.073, + "args": { + "External id": 294054,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367855380.523, "dur": 4.088, + "args": { + "External id": 294055,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333367855389.684, "dur": 12.706, + "args": { + "External id": 294056,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333367855396.357, "dur": 5.634, + "args": { + "External id": 294057,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367855400.121, "dur": 0.693, + "args": { + "External id": 294058,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333367855409.089, "dur": 33.858, + "args": { + "External id": 294059,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367855411.539, "dur": 2.560, + "args": { + "External id": 294060,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367855416.735, "dur": 0.339, + "args": { + "External id": 294061,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367855419.072, "dur": 0.636, + "args": { + "External id": 294062,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367855421.210, "dur": 2.044, + "args": { + "External id": 294063,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367855425.037, "dur": 0.587, + "args": { + "External id": 294064,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367855426.885, "dur": 0.401, + "args": { + "External id": 294065,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367855430.274, "dur": 0.356, + "args": { + "External id": 294066,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367855432.399, "dur": 0.358, + "args": { + "External id": 294067,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367855434.178, "dur": 2.415, + "args": { + "External id": 294068,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367855454.203, "dur": 18.685, + "args": { + "External id": 294069,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333367855532.230, "dur": 169.731, + "args": { + "External id": 294070,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367855564.213, "dur": 133.597, + "args": { + "External id": 294071,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 694, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333367855572.949, "dur": 119.978, + "args": { + "External id": 294072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367855721.885, "dur": 1.842, + "args": { + "External id": 294073,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 696, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367855810.793, "dur": 1740.620, + "args": { + "External id": 294074,"Sequence number": 1209216, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 697 + } + }, + { + "ph": "f", "id": 16, "pid": 2070552, "tid": 2107648, "ts": 5333367855810.793, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367855926.509, "dur": 107.380, + "args": { + "External id": 294075,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333367856076.882, "dur": 43.097, + "args": { + "External id": 294076,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333367856138.435, "dur": 79.984, + "args": { + "External id": 294077,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367856236.845, "dur": 39.013, + "args": { + "External id": 294078,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367856282.864, "dur": 45.156, + "args": { + "External id": 294079,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367856334.890, "dur": 27.199, + "args": { + "External id": 294080,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367856372.770, "dur": 42.979, + "args": { + "External id": 294081,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367856443.876, "dur": 23.786, + "args": { + "External id": 294082,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333367856488.356, "dur": 25.711, + "args": { + "External id": 294083,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367856537.613, "dur": 19.534, + "args": { + "External id": 294084,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333367856573.153, "dur": 14.650, + "args": { + "External id": 294085,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367856607.105, "dur": 67.785, + "args": { + "External id": 294086,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367856680.670, "dur": 38.117, + "args": { + "External id": 294087,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333367856751.055, "dur": 184.045, + "args": { + "External id": 294088,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367856838.206, "dur": 7.776, + "args": { + "External id": 294089,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367856847.962, "dur": 2.566, + "args": { + "External id": 294090,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367856980.459, "dur": 31.652, + "args": { + "External id": 294091,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367857026.925, "dur": 13.254, + "args": { + "External id": 294092,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367857051.505, "dur": 40.056, + "args": { + "External id": 294093,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367857099.371, "dur": 33.878, + "args": { + "External id": 294094,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367857139.701, "dur": 23.902, + "args": { + "External id": 294095,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367857188.940, "dur": 44.964, + "args": { + "External id": 294096,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367857242.992, "dur": 31.891, + "args": { + "External id": 294097,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367857285.812, "dur": 31.469, + "args": { + "External id": 294098,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333367857344.981, "dur": 22.877, + "args": { + "External id": 294099,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367857386.627, "dur": 28.728, + "args": { + "External id": 294100,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367857434.632, "dur": 16.118, + "args": { + "External id": 294101,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333367857466.125, "dur": 22.033, + "args": { + "External id": 294102,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333367857502.009, "dur": 16.489, + "args": { + "External id": 294103,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367857601.659, "dur": 18.129, + "args": { + "External id": 294104,"Record function id": 0, "Ev Idx": 727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367857605.250, "dur": 13.529, + "args": { + "External id": 294105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367857609.555, "dur": 8.256, + "args": { + "External id": 294106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367857611.089, "dur": 6.622, + "args": { + "External id": 294107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367857669.748, "dur": 8.958, + "args": { + "External id": 294108,"Record function id": 0, "Ev Idx": 731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367857672.515, "dur": 5.499, + "args": { + "External id": 294109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367857673.947, "dur": 3.042, + "args": { + "External id": 294110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367857674.958, "dur": 1.726, + "args": { + "External id": 294111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367857683.019, "dur": 4.753, + "args": { + "External id": 294112,"Record function id": 0, "Ev Idx": 735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367857684.739, "dur": 2.610, + "args": { + "External id": 294113,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367857685.401, "dur": 1.475, + "args": { + "External id": 294114,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367857685.958, "dur": 0.832, + "args": { + "External id": 294115,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367857690.885, "dur": 4.998, + "args": { + "External id": 294116,"Record function id": 0, "Ev Idx": 739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367857692.317, "dur": 3.166, + "args": { + "External id": 294117,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367857693.070, "dur": 1.740, + "args": { + "External id": 294118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367857693.785, "dur": 0.953, + "args": { + "External id": 294119,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367857698.879, "dur": 6.448, + "args": { + "External id": 294120,"Record function id": 0, "Ev Idx": 743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367857700.438, "dur": 4.472, + "args": { + "External id": 294121,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367857700.891, "dur": 3.167, + "args": { + "External id": 294122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367857703.308, "dur": 0.628, + "args": { + "External id": 294123,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367857708.349, "dur": 3.660, + "args": { + "External id": 294124,"Record function id": 0, "Ev Idx": 747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367857709.623, "dur": 1.970, + "args": { + "External id": 294125,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367857710.107, "dur": 1.023, + "args": { + "External id": 294126,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367857710.382, "dur": 0.658, + "args": { + "External id": 294127,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367857715.196, "dur": 4.028, + "args": { + "External id": 294128,"Record function id": 0, "Ev Idx": 751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367857716.482, "dur": 2.336, + "args": { + "External id": 294129,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367857717.175, "dur": 0.938, + "args": { + "External id": 294130,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367857717.468, "dur": 0.567, + "args": { + "External id": 294131,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367857722.347, "dur": 6.923, + "args": { + "External id": 294132,"Record function id": 0, "Ev Idx": 755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367857723.633, "dur": 5.221, + "args": { + "External id": 294133,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367857724.153, "dur": 4.018, + "args": { + "External id": 294134,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367857724.814, "dur": 3.287, + "args": { + "External id": 294135,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367857732.304, "dur": 6.574, + "args": { + "External id": 294136,"Record function id": 0, "Ev Idx": 759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367857733.537, "dur": 4.880, + "args": { + "External id": 294137,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367857734.216, "dur": 3.446, + "args": { + "External id": 294138,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367857736.770, "dur": 0.791, + "args": { + "External id": 294139,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367857744.764, "dur": 37948.363, + "args": { + "External id": 294140,"Record function id": 0, "Sequence number": 1209215, "Fwd thread id": 1, "Ev Idx": 763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367857746.307, "dur": 37935.236, + "args": { + "External id": 294141,"Sequence number": 1209215, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 764 + } + }, + { + "ph": "f", "id": 17, "pid": 2070552, "tid": 2107648, "ts": 5333367857746.307, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.27)", "pid": 2070552, "tid": 2107648, + "ts": 5333367857778.421, "dur": 41.835, + "args": { + "External id": 294142,"Record function id": 0, "Ev Idx": 765 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.27)", "pid": 2070552, "tid": 2107648, + "ts": 5333367857828.300, "dur": 72.938, + "args": { + "External id": 294143,"Record function id": 0, "Ev Idx": 766 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.27)", "pid": 2070552, "tid": 2107648, + "ts": 5333367857907.009, "dur": 37764.742, + "args": { + "External id": 294144,"Record function id": 0, "Ev Idx": 767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367858000.629, "dur": 7.553, + "args": { + "External id": 294145,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367858018.546, "dur": 4.751, + "args": { + "External id": 294146,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333367858038.436, "dur": 36715.881, + "args": { + "External id": 294147,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333367858057.480, "dur": 36685.506, + "args": { + "External id": 294148,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367858097.697, "dur": 14.756, + "args": { + "External id": 294149,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333367858118.626, "dur": 36584.776, + "args": { + "External id": 294150,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367858120.976, "dur": 36581.675, + "args": { + "External id": 294151,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367858126.718, "dur": 5.146, + "args": { + "External id": 294152,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367858133.638, "dur": 36564.612, + "args": { + "External id": 294153,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367894855.659, "dur": 10.954, + "args": { + "External id": 294154,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367894859.321, "dur": 6.898, + "args": { + "External id": 294155,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333367894894.887, "dur": 410.604, + "args": { + "External id": 294156,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367894931.137, "dur": 368.422, + "args": { + "External id": 294157,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 780, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333367894943.891, "dur": 348.946, + "args": { + "External id": 294158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367895331.125, "dur": 2.509, + "args": { + "External id": 294159,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 782, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367895400.241, "dur": 7.251, + "args": { + "External id": 294160,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367895458.453, "dur": 1.205, + "args": { + "External id": 294161,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367895478.111, "dur": 1.211, + "args": { + "External id": 294162,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367895492.322, "dur": 0.894, + "args": { + "External id": 294163,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367895509.021, "dur": 0.795, + "args": { + "External id": 294164,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367895523.646, "dur": 0.635, + "args": { + "External id": 294165,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367895537.666, "dur": 1.397, + "args": { + "External id": 294166,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367895551.692, "dur": 3.126, + "args": { + "External id": 294167,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367895567.335, "dur": 1.132, + "args": { + "External id": 294168,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367895710.043, "dur": 3003.854, + "args": { + "External id": 294169,"Record function id": 0, "Ev Idx": 792 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.26)", "pid": 2070552, "tid": 2107648, + "ts": 5333367895731.287, "dur": 1127.299, + "args": { + "External id": 294170,"Record function id": 0, "Ev Idx": 793 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.26)", "pid": 2070552, "tid": 2107648, + "ts": 5333367895746.464, "dur": 328.493, + "args": { + "External id": 294171,"Record function id": 0, "Ev Idx": 794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367895830.148, "dur": 4.981, + "args": { + "External id": 294172,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367895838.330, "dur": 1.694, + "args": { + "External id": 294173,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367895843.632, "dur": 1.016, + "args": { + "External id": 294174,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367895846.056, "dur": 1.424, + "args": { + "External id": 294175,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367895849.210, "dur": 1.485, + "args": { + "External id": 294176,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367895852.192, "dur": 0.948, + "args": { + "External id": 294177,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367895856.832, "dur": 3.073, + "args": { + "External id": 294178,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367895861.663, "dur": 2.371, + "args": { + "External id": 294179,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367895865.648, "dur": 1.106, + "args": { + "External id": 294180,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367895868.123, "dur": 1.174, + "args": { + "External id": 294181,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367895889.714, "dur": 156.567, + "args": { + "External id": 294182,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367895906.402, "dur": 135.284, + "args": { + "External id": 294183,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367895925.232, "dur": 13.608, + "args": { + "External id": 294184,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333367895942.479, "dur": 69.956, + "args": { + "External id": 294185,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367895945.139, "dur": 66.936, + "args": { + "External id": 294186,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367895950.405, "dur": 4.640, + "args": { + "External id": 294187,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367895956.755, "dur": 54.639, + "args": { + "External id": 294188,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 811 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.25", "pid": 2070552, "tid": 2107648, + "ts": 5333367896184.106, "dur": 666.648, + "args": { + "External id": 294189,"Record function id": 0, "Ev Idx": 812 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 2070552, "tid": 2107648, + "ts": 5333367896209.213, "dur": 629.106, + "args": { + "External id": 294190,"Record function id": 0, "Ev Idx": 813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367896284.715, "dur": 6.752, + "args": { + "External id": 294191,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333367896312.095, "dur": 36.118, + "args": { + "External id": 294192,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367896317.715, "dur": 1.667, + "args": { + "External id": 294193,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367896322.448, "dur": 1.784, + "args": { + "External id": 294194,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367896325.973, "dur": 2.277, + "args": { + "External id": 294195,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367896329.825, "dur": 0.575, + "args": { + "External id": 294196,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367896333.141, "dur": 0.333, + "args": { + "External id": 294197,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367896335.162, "dur": 0.553, + "args": { + "External id": 294198,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367896337.637, "dur": 0.442, + "args": { + "External id": 294199,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367896340.453, "dur": 0.352, + "args": { + "External id": 294200,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367896342.043, "dur": 0.338, + "args": { + "External id": 294201,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367896361.993, "dur": 38.365, + "args": { + "External id": 294202,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333367896431.491, "dur": 116.690, + "args": { + "External id": 294203,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367896442.976, "dur": 4.930, + "args": { + "External id": 294204,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333367896452.909, "dur": 12.991, + "args": { + "External id": 294205,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333367896457.086, "dur": 8.363, + "args": { + "External id": 294206,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367896461.006, "dur": 2.911, + "args": { + "External id": 294207,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333367896472.786, "dur": 29.709, + "args": { + "External id": 294208,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367896474.602, "dur": 0.333, + "args": { + "External id": 294209,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367896476.582, "dur": 1.258, + "args": { + "External id": 294210,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367896479.466, "dur": 0.314, + "args": { + "External id": 294211,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367896481.011, "dur": 0.380, + "args": { + "External id": 294212,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367896484.049, "dur": 0.464, + "args": { + "External id": 294213,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367896486.220, "dur": 0.365, + "args": { + "External id": 294214,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367896488.009, "dur": 0.521, + "args": { + "External id": 294215,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367896491.658, "dur": 2.415, + "args": { + "External id": 294216,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367896495.515, "dur": 0.357, + "args": { + "External id": 294217,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367896515.687, "dur": 24.511, + "args": { + "External id": 294218,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333367896594.865, "dur": 166.129, + "args": { + "External id": 294219,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367896664.712, "dur": 92.610, + "args": { + "External id": 294220,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 843, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333367896675.693, "dur": 76.266, + "args": { + "External id": 294221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367896779.712, "dur": 1.885, + "args": { + "External id": 294222,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 845, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367896866.165, "dur": 1820.149, + "args": { + "External id": 294223,"Sequence number": 1209214, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 846 + } + }, + { + "ph": "f", "id": 18, "pid": 2070552, "tid": 2107648, "ts": 5333367896866.165, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367896980.229, "dur": 109.835, + "args": { + "External id": 294224,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333367897137.128, "dur": 59.527, + "args": { + "External id": 294225,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333367897224.161, "dur": 71.261, + "args": { + "External id": 294226,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367897307.643, "dur": 36.154, + "args": { + "External id": 294227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367897350.420, "dur": 45.323, + "args": { + "External id": 294228,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367897402.170, "dur": 27.338, + "args": { + "External id": 294229,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367897438.978, "dur": 40.817, + "args": { + "External id": 294230,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367897510.047, "dur": 26.946, + "args": { + "External id": 294231,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333367897558.088, "dur": 30.567, + "args": { + "External id": 294232,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367897610.928, "dur": 63.136, + "args": { + "External id": 294233,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333367897693.439, "dur": 19.143, + "args": { + "External id": 294234,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367897725.543, "dur": 35.039, + "args": { + "External id": 294235,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367897764.286, "dur": 34.243, + "args": { + "External id": 294236,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333367897830.815, "dur": 182.296, + "args": { + "External id": 294237,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367897914.583, "dur": 5.896, + "args": { + "External id": 294238,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367897922.285, "dur": 4.173, + "args": { + "External id": 294239,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367898049.934, "dur": 23.837, + "args": { + "External id": 294240,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367898104.297, "dur": 23.752, + "args": { + "External id": 294241,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367898141.280, "dur": 61.288, + "args": { + "External id": 294242,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367898216.487, "dur": 55.353, + "args": { + "External id": 294243,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367898281.068, "dur": 22.383, + "args": { + "External id": 294244,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367898307.844, "dur": 29.536, + "args": { + "External id": 294245,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367898346.124, "dur": 20.557, + "args": { + "External id": 294246,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367898374.139, "dur": 29.158, + "args": { + "External id": 294247,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333367898426.758, "dur": 24.275, + "args": { + "External id": 294248,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367898471.718, "dur": 29.791, + "args": { + "External id": 294249,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367898530.283, "dur": 20.267, + "args": { + "External id": 294250,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333367898570.207, "dur": 16.557, + "args": { + "External id": 294251,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333367898602.787, "dur": 16.174, + "args": { + "External id": 294252,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367898736.908, "dur": 21.096, + "args": { + "External id": 294253,"Record function id": 0, "Ev Idx": 876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367898740.297, "dur": 16.587, + "args": { + "External id": 294254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367898744.985, "dur": 11.049, + "args": { + "External id": 294255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367898746.588, "dur": 9.360, + "args": { + "External id": 294256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367898761.977, "dur": 4.921, + "args": { + "External id": 294257,"Record function id": 0, "Ev Idx": 880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367898763.447, "dur": 3.004, + "args": { + "External id": 294258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367898764.402, "dur": 1.523, + "args": { + "External id": 294259,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367898764.909, "dur": 0.933, + "args": { + "External id": 294260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367898770.106, "dur": 4.645, + "args": { + "External id": 294261,"Record function id": 0, "Ev Idx": 884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367898771.221, "dur": 3.134, + "args": { + "External id": 294262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367898771.899, "dur": 2.031, + "args": { + "External id": 294263,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367898772.949, "dur": 0.903, + "args": { + "External id": 294264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367898777.873, "dur": 4.577, + "args": { + "External id": 294265,"Record function id": 0, "Ev Idx": 888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367898779.242, "dur": 2.827, + "args": { + "External id": 294266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367898780.073, "dur": 1.447, + "args": { + "External id": 294267,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367898780.753, "dur": 0.635, + "args": { + "External id": 294268,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367898785.464, "dur": 3.095, + "args": { + "External id": 294269,"Record function id": 0, "Ev Idx": 892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367898786.370, "dur": 1.802, + "args": { + "External id": 294270,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367898786.800, "dur": 0.951, + "args": { + "External id": 294271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367898787.099, "dur": 0.586, + "args": { + "External id": 294272,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367898791.578, "dur": 5.102, + "args": { + "External id": 294273,"Record function id": 0, "Ev Idx": 896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367898792.461, "dur": 3.813, + "args": { + "External id": 294274,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367898792.939, "dur": 2.683, + "args": { + "External id": 294275,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367898794.853, "dur": 0.665, + "args": { + "External id": 294276,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367898799.906, "dur": 3.663, + "args": { + "External id": 294277,"Record function id": 0, "Ev Idx": 900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367898801.026, "dur": 2.166, + "args": { + "External id": 294278,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367898801.455, "dur": 1.101, + "args": { + "External id": 294279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367898801.765, "dur": 0.694, + "args": { + "External id": 294280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367898806.519, "dur": 8.413, + "args": { + "External id": 294281,"Record function id": 0, "Ev Idx": 904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367898807.518, "dur": 7.004, + "args": { + "External id": 294282,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367898807.970, "dur": 5.882, + "args": { + "External id": 294283,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367898808.222, "dur": 2.328, + "args": { + "External id": 294284,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367898817.955, "dur": 3.063, + "args": { + "External id": 294285,"Record function id": 0, "Ev Idx": 908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367898818.875, "dur": 1.742, + "args": { + "External id": 294286,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367898819.297, "dur": 0.923, + "args": { + "External id": 294287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367898819.691, "dur": 0.466, + "args": { + "External id": 294288,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367898824.731, "dur": 37598.499, + "args": { + "External id": 294289,"Record function id": 0, "Sequence number": 1209213, "Fwd thread id": 1, "Ev Idx": 912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367898826.350, "dur": 37586.926, + "args": { + "External id": 294290,"Sequence number": 1209213, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 913 + } + }, + { + "ph": "f", "id": 19, "pid": 2070552, "tid": 2107648, "ts": 5333367898826.350, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.26)", "pid": 2070552, "tid": 2107648, + "ts": 5333367898857.734, "dur": 40.310, + "args": { + "External id": 294291,"Record function id": 0, "Ev Idx": 914 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.26)", "pid": 2070552, "tid": 2107648, + "ts": 5333367898906.172, "dur": 65.728, + "args": { + "External id": 294292,"Record function id": 0, "Ev Idx": 915 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.26)", "pid": 2070552, "tid": 2107648, + "ts": 5333367898977.536, "dur": 37426.078, + "args": { + "External id": 294293,"Record function id": 0, "Ev Idx": 916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367899069.957, "dur": 7.392, + "args": { + "External id": 294294,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367899086.703, "dur": 4.822, + "args": { + "External id": 294295,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333367899107.478, "dur": 36342.667, + "args": { + "External id": 294296,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333367899120.937, "dur": 36316.528, + "args": { + "External id": 294297,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367899154.623, "dur": 35.011, + "args": { + "External id": 294298,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333367899198.210, "dur": 36194.186, + "args": { + "External id": 294299,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367899202.646, "dur": 36188.659, + "args": { + "External id": 294300,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367899209.318, "dur": 7.240, + "args": { + "External id": 294301,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367899218.856, "dur": 36167.610, + "args": { + "External id": 294302,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367935554.218, "dur": 11.190, + "args": { + "External id": 294303,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367935557.653, "dur": 7.402, + "args": { + "External id": 294304,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333367935596.017, "dur": 449.847, + "args": { + "External id": 294305,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367935666.970, "dur": 373.597, + "args": { + "External id": 294306,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 929, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333367935681.828, "dur": 353.459, + "args": { + "External id": 294307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367936067.246, "dur": 2.224, + "args": { + "External id": 294308,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 931, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367936133.188, "dur": 6.675, + "args": { + "External id": 294309,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367936214.922, "dur": 2.645, + "args": { + "External id": 294310,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367936239.371, "dur": 1.856, + "args": { + "External id": 294311,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367936257.487, "dur": 1.285, + "args": { + "External id": 294312,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367936278.473, "dur": 1.020, + "args": { + "External id": 294313,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367936292.623, "dur": 1.010, + "args": { + "External id": 294314,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367936306.682, "dur": 1.005, + "args": { + "External id": 294315,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367936318.561, "dur": 3.612, + "args": { + "External id": 294316,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367936332.261, "dur": 1.007, + "args": { + "External id": 294317,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367936440.250, "dur": 2940.293, + "args": { + "External id": 294318,"Record function id": 0, "Ev Idx": 941 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.25)", "pid": 2070552, "tid": 2107648, + "ts": 5333367936462.192, "dur": 1116.510, + "args": { + "External id": 294319,"Record function id": 0, "Ev Idx": 942 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 2070552, "tid": 2107648, + "ts": 5333367936477.114, "dur": 369.676, + "args": { + "External id": 294320,"Record function id": 0, "Ev Idx": 943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367936561.006, "dur": 3.947, + "args": { + "External id": 294321,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367936568.345, "dur": 1.220, + "args": { + "External id": 294322,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367936571.310, "dur": 0.832, + "args": { + "External id": 294323,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367936574.785, "dur": 1.088, + "args": { + "External id": 294324,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367936577.154, "dur": 1.135, + "args": { + "External id": 294325,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367936579.801, "dur": 0.771, + "args": { + "External id": 294326,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367936582.027, "dur": 2.652, + "args": { + "External id": 294327,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367936587.284, "dur": 2.845, + "args": { + "External id": 294328,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367936591.661, "dur": 1.168, + "args": { + "External id": 294329,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367936594.392, "dur": 1.247, + "args": { + "External id": 294330,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367936613.020, "dur": 201.251, + "args": { + "External id": 294331,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367936665.984, "dur": 143.068, + "args": { + "External id": 294332,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367936685.901, "dur": 14.064, + "args": { + "External id": 294333,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333367936703.925, "dur": 75.887, + "args": { + "External id": 294334,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367936706.663, "dur": 72.861, + "args": { + "External id": 294335,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367936712.163, "dur": 8.824, + "args": { + "External id": 294336,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367936722.722, "dur": 56.301, + "args": { + "External id": 294337,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 960 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.24", "pid": 2070552, "tid": 2107648, + "ts": 5333367936940.212, "dur": 630.700, + "args": { + "External id": 294338,"Record function id": 0, "Ev Idx": 961 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2070552, "tid": 2107648, + "ts": 5333367936958.099, "dur": 599.444, + "args": { + "External id": 294339,"Record function id": 0, "Ev Idx": 962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367937019.201, "dur": 4.629, + "args": { + "External id": 294340,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333367937039.662, "dur": 34.865, + "args": { + "External id": 294341,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367937044.544, "dur": 1.876, + "args": { + "External id": 294342,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367937048.503, "dur": 2.266, + "args": { + "External id": 294343,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367937052.587, "dur": 2.481, + "args": { + "External id": 294344,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367937056.278, "dur": 0.318, + "args": { + "External id": 294345,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367937059.272, "dur": 0.606, + "args": { + "External id": 294346,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367937061.187, "dur": 0.366, + "args": { + "External id": 294347,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367937062.909, "dur": 0.298, + "args": { + "External id": 294348,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367937065.952, "dur": 0.568, + "args": { + "External id": 294349,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367937067.989, "dur": 0.468, + "args": { + "External id": 294350,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367937084.570, "dur": 29.980, + "args": { + "External id": 294351,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333367937146.710, "dur": 157.518, + "args": { + "External id": 294352,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367937156.337, "dur": 2.797, + "args": { + "External id": 294353,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333367937164.039, "dur": 34.933, + "args": { + "External id": 294354,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333367937187.800, "dur": 10.595, + "args": { + "External id": 294355,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367937192.628, "dur": 3.255, + "args": { + "External id": 294356,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333367937209.852, "dur": 36.455, + "args": { + "External id": 294357,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367937212.897, "dur": 0.901, + "args": { + "External id": 294358,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367937217.332, "dur": 0.459, + "args": { + "External id": 294359,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367937219.653, "dur": 0.627, + "args": { + "External id": 294360,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367937221.845, "dur": 1.680, + "args": { + "External id": 294361,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367937225.179, "dur": 0.843, + "args": { + "External id": 294362,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367937227.910, "dur": 0.697, + "args": { + "External id": 294363,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367937232.662, "dur": 0.449, + "args": { + "External id": 294364,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367937235.197, "dur": 2.642, + "args": { + "External id": 294365,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367937239.843, "dur": 0.421, + "args": { + "External id": 294366,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367937260.190, "dur": 29.955, + "args": { + "External id": 294367,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333367937354.378, "dur": 134.075, + "args": { + "External id": 294368,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367937385.658, "dur": 99.303, + "args": { + "External id": 294369,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 992, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333367937395.871, "dur": 84.829, + "args": { + "External id": 294370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367937504.058, "dur": 1.659, + "args": { + "External id": 294371,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 994, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367937586.200, "dur": 1767.321, + "args": { + "External id": 294372,"Sequence number": 1209212, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 995 + } + }, + { + "ph": "f", "id": 20, "pid": 2070552, "tid": 2107648, "ts": 5333367937586.200, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367937740.246, "dur": 113.766, + "args": { + "External id": 294373,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333367937900.085, "dur": 44.234, + "args": { + "External id": 294374,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333367937962.791, "dur": 54.561, + "args": { + "External id": 294375,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367938027.264, "dur": 32.630, + "args": { + "External id": 294376,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367938065.945, "dur": 45.118, + "args": { + "External id": 294377,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367938116.944, "dur": 27.997, + "args": { + "External id": 294378,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367938154.587, "dur": 66.748, + "args": { + "External id": 294379,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367938258.554, "dur": 30.032, + "args": { + "External id": 294380,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333367938307.954, "dur": 28.302, + "args": { + "External id": 294381,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367938358.999, "dur": 17.835, + "args": { + "External id": 294382,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333367938387.719, "dur": 15.203, + "args": { + "External id": 294383,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367938415.624, "dur": 33.468, + "args": { + "External id": 294384,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367938452.490, "dur": 32.357, + "args": { + "External id": 294385,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333367938515.060, "dur": 215.915, + "args": { + "External id": 294386,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367938598.103, "dur": 6.310, + "args": { + "External id": 294387,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367938606.395, "dur": 2.276, + "args": { + "External id": 294388,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367938770.210, "dur": 38.182, + "args": { + "External id": 294389,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367938826.030, "dur": 18.028, + "args": { + "External id": 294390,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367938858.964, "dur": 48.334, + "args": { + "External id": 294391,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367938913.404, "dur": 36.241, + "args": { + "External id": 294392,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367938962.588, "dur": 19.860, + "args": { + "External id": 294393,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367938986.748, "dur": 33.594, + "args": { + "External id": 294394,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367939027.545, "dur": 19.044, + "args": { + "External id": 294395,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367939053.435, "dur": 31.798, + "args": { + "External id": 294396,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333367939102.481, "dur": 22.216, + "args": { + "External id": 294397,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367939141.303, "dur": 24.185, + "args": { + "External id": 294398,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367939205.154, "dur": 24.963, + "args": { + "External id": 294399,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333367939251.489, "dur": 20.538, + "args": { + "External id": 294400,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333367939306.052, "dur": 15.982, + "args": { + "External id": 294401,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367939403.598, "dur": 17.098, + "args": { + "External id": 294402,"Record function id": 0, "Ev Idx": 1025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367939406.874, "dur": 12.739, + "args": { + "External id": 294403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367939411.243, "dur": 7.308, + "args": { + "External id": 294404,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367939412.449, "dur": 6.009, + "args": { + "External id": 294405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367939424.356, "dur": 4.384, + "args": { + "External id": 294406,"Record function id": 0, "Ev Idx": 1029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367939425.718, "dur": 2.554, + "args": { + "External id": 294407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367939426.684, "dur": 1.137, + "args": { + "External id": 294408,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367939426.973, "dur": 0.758, + "args": { + "External id": 294409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367939431.971, "dur": 4.007, + "args": { + "External id": 294410,"Record function id": 0, "Ev Idx": 1033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367939433.140, "dur": 2.433, + "args": { + "External id": 294411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367939433.994, "dur": 1.090, + "args": { + "External id": 294412,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367939434.355, "dur": 0.654, + "args": { + "External id": 294413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367939439.322, "dur": 4.058, + "args": { + "External id": 294414,"Record function id": 0, "Ev Idx": 1037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367939440.496, "dur": 2.483, + "args": { + "External id": 294415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367939441.251, "dur": 1.062, + "args": { + "External id": 294416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367939441.576, "dur": 0.667, + "args": { + "External id": 294417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367939446.831, "dur": 3.950, + "args": { + "External id": 294418,"Record function id": 0, "Ev Idx": 1041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367939448.004, "dur": 2.355, + "args": { + "External id": 294419,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367939448.515, "dur": 1.267, + "args": { + "External id": 294420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367939449.051, "dur": 0.658, + "args": { + "External id": 294421,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367939454.013, "dur": 4.653, + "args": { + "External id": 294422,"Record function id": 0, "Ev Idx": 1045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367939455.253, "dur": 2.993, + "args": { + "External id": 294423,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367939456.044, "dur": 1.512, + "args": { + "External id": 294424,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367939456.811, "dur": 0.676, + "args": { + "External id": 294425,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367939461.843, "dur": 4.501, + "args": { + "External id": 294426,"Record function id": 0, "Ev Idx": 1049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367939463.004, "dur": 2.942, + "args": { + "External id": 294427,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367939463.938, "dur": 1.495, + "args": { + "External id": 294428,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367939464.682, "dur": 0.674, + "args": { + "External id": 294429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367939469.361, "dur": 6.122, + "args": { + "External id": 294430,"Record function id": 0, "Ev Idx": 1053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367939470.353, "dur": 4.702, + "args": { + "External id": 294431,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367939471.128, "dur": 3.341, + "args": { + "External id": 294432,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367939471.606, "dur": 2.797, + "args": { + "External id": 294433,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367939478.457, "dur": 4.497, + "args": { + "External id": 294434,"Record function id": 0, "Ev Idx": 1057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367939479.512, "dur": 2.989, + "args": { + "External id": 294435,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367939480.313, "dur": 1.515, + "args": { + "External id": 294436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367939481.027, "dur": 0.728, + "args": { + "External id": 294437,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367939488.463, "dur": 36940.013, + "args": { + "External id": 294438,"Record function id": 0, "Sequence number": 1209211, "Fwd thread id": 1, "Ev Idx": 1061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367939489.851, "dur": 36928.888, + "args": { + "External id": 294439,"Sequence number": 1209211, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1062 + } + }, + { + "ph": "f", "id": 21, "pid": 2070552, "tid": 2107648, "ts": 5333367939489.851, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.25)", "pid": 2070552, "tid": 2107648, + "ts": 5333367939522.477, "dur": 41.399, + "args": { + "External id": 294440,"Record function id": 0, "Ev Idx": 1063 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.25)", "pid": 2070552, "tid": 2107648, + "ts": 5333367939571.235, "dur": 112.614, + "args": { + "External id": 294441,"Record function id": 0, "Ev Idx": 1064 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.25)", "pid": 2070552, "tid": 2107648, + "ts": 5333367939692.021, "dur": 36718.629, + "args": { + "External id": 294442,"Record function id": 0, "Ev Idx": 1065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367939783.771, "dur": 8.078, + "args": { + "External id": 294443,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367939802.646, "dur": 5.065, + "args": { + "External id": 294444,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333367939823.513, "dur": 35714.763, + "args": { + "External id": 294445,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333367939837.199, "dur": 35690.051, + "args": { + "External id": 294446,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367939875.921, "dur": 14.664, + "args": { + "External id": 294447,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333367939896.765, "dur": 35592.178, + "args": { + "External id": 294448,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367939899.375, "dur": 35588.696, + "args": { + "External id": 294449,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367939904.757, "dur": 5.142, + "args": { + "External id": 294450,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367939911.684, "dur": 35571.844, + "args": { + "External id": 294451,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367975670.080, "dur": 15.089, + "args": { + "External id": 294452,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367975676.931, "dur": 7.691, + "args": { + "External id": 294453,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333367975716.084, "dur": 357.091, + "args": { + "External id": 294454,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367975742.467, "dur": 325.536, + "args": { + "External id": 294455,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1078, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333367975755.980, "dur": 306.960, + "args": { + "External id": 294456,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367976090.658, "dur": 2.412, + "args": { + "External id": 294457,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1080, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367976153.010, "dur": 6.782, + "args": { + "External id": 294458,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367976236.915, "dur": 3.182, + "args": { + "External id": 294459,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367976257.805, "dur": 1.577, + "args": { + "External id": 294460,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367976272.657, "dur": 0.874, + "args": { + "External id": 294461,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367976285.401, "dur": 0.907, + "args": { + "External id": 294462,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367976302.396, "dur": 0.835, + "args": { + "External id": 294463,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367976313.562, "dur": 1.146, + "args": { + "External id": 294464,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367976326.249, "dur": 2.166, + "args": { + "External id": 294465,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367976338.398, "dur": 0.904, + "args": { + "External id": 294466,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367976445.135, "dur": 2885.931, + "args": { + "External id": 294467,"Record function id": 0, "Ev Idx": 1090 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.24)", "pid": 2070552, "tid": 2107648, + "ts": 5333367976466.364, "dur": 1086.482, + "args": { + "External id": 294468,"Record function id": 0, "Ev Idx": 1091 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2070552, "tid": 2107648, + "ts": 5333367976480.965, "dur": 363.997, + "args": { + "External id": 294469,"Record function id": 0, "Ev Idx": 1092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367976566.070, "dur": 4.051, + "args": { + "External id": 294470,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367976573.214, "dur": 0.992, + "args": { + "External id": 294471,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367976575.970, "dur": 1.003, + "args": { + "External id": 294472,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367976578.459, "dur": 0.866, + "args": { + "External id": 294473,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367976581.335, "dur": 1.130, + "args": { + "External id": 294474,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367976584.174, "dur": 0.891, + "args": { + "External id": 294475,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367976586.831, "dur": 1.531, + "args": { + "External id": 294476,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367976589.904, "dur": 3.288, + "args": { + "External id": 294477,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367976595.005, "dur": 0.849, + "args": { + "External id": 294478,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367976597.793, "dur": 0.772, + "args": { + "External id": 294479,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367976616.723, "dur": 196.373, + "args": { + "External id": 294480,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367976669.629, "dur": 138.660, + "args": { + "External id": 294481,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367976687.349, "dur": 13.822, + "args": { + "External id": 294482,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333367976704.966, "dur": 75.360, + "args": { + "External id": 294483,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367976707.442, "dur": 72.531, + "args": { + "External id": 294484,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367976713.773, "dur": 7.289, + "args": { + "External id": 294485,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367976722.854, "dur": 56.517, + "args": { + "External id": 294486,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1109 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.23", "pid": 2070552, "tid": 2107648, + "ts": 5333367976935.116, "dur": 610.563, + "args": { + "External id": 294487,"Record function id": 0, "Ev Idx": 1110 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2070552, "tid": 2107648, + "ts": 5333367976952.011, "dur": 580.498, + "args": { + "External id": 294488,"Record function id": 0, "Ev Idx": 1111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367977014.110, "dur": 4.534, + "args": { + "External id": 294489,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333367977034.838, "dur": 29.236, + "args": { + "External id": 294490,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367977039.525, "dur": 1.515, + "args": { + "External id": 294491,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367977042.522, "dur": 1.671, + "args": { + "External id": 294492,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367977044.995, "dur": 2.193, + "args": { + "External id": 294493,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367977048.059, "dur": 0.295, + "args": { + "External id": 294494,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367977051.510, "dur": 0.551, + "args": { + "External id": 294495,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367977053.162, "dur": 0.625, + "args": { + "External id": 294496,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367977054.358, "dur": 0.644, + "args": { + "External id": 294497,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367977057.721, "dur": 0.607, + "args": { + "External id": 294498,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367977059.034, "dur": 0.572, + "args": { + "External id": 294499,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367977072.905, "dur": 30.427, + "args": { + "External id": 294500,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333367977133.249, "dur": 156.022, + "args": { + "External id": 294501,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367977142.422, "dur": 3.226, + "args": { + "External id": 294502,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333367977150.775, "dur": 11.951, + "args": { + "External id": 294503,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333367977155.094, "dur": 7.212, + "args": { + "External id": 294504,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367977158.382, "dur": 2.767, + "args": { + "External id": 294505,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333367977190.816, "dur": 38.735, + "args": { + "External id": 294506,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367977193.492, "dur": 0.871, + "args": { + "External id": 294507,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367977198.202, "dur": 1.200, + "args": { + "External id": 294508,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367977201.708, "dur": 0.398, + "args": { + "External id": 294509,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367977204.232, "dur": 2.162, + "args": { + "External id": 294510,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367977208.351, "dur": 0.683, + "args": { + "External id": 294511,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367977211.536, "dur": 1.021, + "args": { + "External id": 294512,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367977217.162, "dur": 0.568, + "args": { + "External id": 294513,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367977219.791, "dur": 2.233, + "args": { + "External id": 294514,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367977224.057, "dur": 0.694, + "args": { + "External id": 294515,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367977249.260, "dur": 29.175, + "args": { + "External id": 294516,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333367977341.992, "dur": 120.187, + "args": { + "External id": 294517,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367977369.558, "dur": 89.380, + "args": { + "External id": 294518,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1141, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333367977379.034, "dur": 75.586, + "args": { + "External id": 294519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333367977478.430, "dur": 1.688, + "args": { + "External id": 294520,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1143, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367977560.239, "dur": 1743.706, + "args": { + "External id": 294521,"Sequence number": 1209210, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1144 + } + }, + { + "ph": "f", "id": 22, "pid": 2070552, "tid": 2107648, "ts": 5333367977560.239, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367977716.227, "dur": 110.182, + "args": { + "External id": 294522,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333367977868.679, "dur": 40.462, + "args": { + "External id": 294523,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333367977931.578, "dur": 49.388, + "args": { + "External id": 294524,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367977993.526, "dur": 33.544, + "args": { + "External id": 294525,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367978033.310, "dur": 48.778, + "args": { + "External id": 294526,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367978089.978, "dur": 29.515, + "args": { + "External id": 294527,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367978129.755, "dur": 64.518, + "args": { + "External id": 294528,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367978228.297, "dur": 35.307, + "args": { + "External id": 294529,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333367978292.450, "dur": 28.216, + "args": { + "External id": 294530,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367978341.950, "dur": 18.462, + "args": { + "External id": 294531,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333367978373.557, "dur": 13.182, + "args": { + "External id": 294532,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367978398.795, "dur": 34.345, + "args": { + "External id": 294533,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367978436.460, "dur": 34.077, + "args": { + "External id": 294534,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333367978500.984, "dur": 209.457, + "args": { + "External id": 294535,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367978579.699, "dur": 5.716, + "args": { + "External id": 294536,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367978587.367, "dur": 2.040, + "args": { + "External id": 294537,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367978749.227, "dur": 26.024, + "args": { + "External id": 294538,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333367978789.567, "dur": 14.004, + "args": { + "External id": 294539,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367978813.475, "dur": 43.273, + "args": { + "External id": 294540,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367978877.159, "dur": 48.812, + "args": { + "External id": 294541,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367978935.464, "dur": 23.291, + "args": { + "External id": 294542,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367978962.914, "dur": 32.646, + "args": { + "External id": 294543,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367979000.696, "dur": 19.901, + "args": { + "External id": 294544,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333367979029.505, "dur": 34.516, + "args": { + "External id": 294545,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333367979084.358, "dur": 21.571, + "args": { + "External id": 294546,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367979122.452, "dur": 24.012, + "args": { + "External id": 294547,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333367979160.513, "dur": 36.371, + "args": { + "External id": 294548,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333367979220.947, "dur": 20.036, + "args": { + "External id": 294549,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333367979256.957, "dur": 16.632, + "args": { + "External id": 294550,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367979353.677, "dur": 16.173, + "args": { + "External id": 294551,"Record function id": 0, "Ev Idx": 1174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367979357.262, "dur": 11.546, + "args": { + "External id": 294552,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367979361.853, "dur": 6.011, + "args": { + "External id": 294553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367979363.324, "dur": 4.425, + "args": { + "External id": 294554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367979373.564, "dur": 7.714, + "args": { + "External id": 294555,"Record function id": 0, "Ev Idx": 1178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367979375.244, "dur": 5.584, + "args": { + "External id": 294556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367979376.157, "dur": 4.205, + "args": { + "External id": 294557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367979376.557, "dur": 3.690, + "args": { + "External id": 294558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367979384.466, "dur": 4.769, + "args": { + "External id": 294559,"Record function id": 0, "Ev Idx": 1182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367979385.794, "dur": 3.025, + "args": { + "External id": 294560,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367979386.631, "dur": 1.743, + "args": { + "External id": 294561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367979387.186, "dur": 1.097, + "args": { + "External id": 294562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367979392.338, "dur": 4.047, + "args": { + "External id": 294563,"Record function id": 0, "Ev Idx": 1186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367979393.662, "dur": 2.278, + "args": { + "External id": 294564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367979394.293, "dur": 1.055, + "args": { + "External id": 294565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367979394.633, "dur": 0.639, + "args": { + "External id": 294566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367979399.370, "dur": 3.798, + "args": { + "External id": 294567,"Record function id": 0, "Ev Idx": 1190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367979400.608, "dur": 2.155, + "args": { + "External id": 294568,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367979401.090, "dur": 1.103, + "args": { + "External id": 294569,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367979401.484, "dur": 0.634, + "args": { + "External id": 294570,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367979406.144, "dur": 4.238, + "args": { + "External id": 294571,"Record function id": 0, "Ev Idx": 1194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367979407.268, "dur": 2.711, + "args": { + "External id": 294572,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367979408.023, "dur": 1.352, + "args": { + "External id": 294573,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367979408.681, "dur": 0.621, + "args": { + "External id": 294574,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367979413.519, "dur": 5.779, + "args": { + "External id": 294575,"Record function id": 0, "Ev Idx": 1198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367979414.717, "dur": 4.133, + "args": { + "External id": 294576,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367979416.897, "dur": 1.571, + "args": { + "External id": 294577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367979417.731, "dur": 0.633, + "args": { + "External id": 294578,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367979422.299, "dur": 4.211, + "args": { + "External id": 294579,"Record function id": 0, "Ev Idx": 1202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367979423.531, "dur": 2.567, + "args": { + "External id": 294580,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367979424.239, "dur": 1.304, + "args": { + "External id": 294581,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367979424.809, "dur": 0.663, + "args": { + "External id": 294582,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367979431.971, "dur": 7.007, + "args": { + "External id": 294583,"Record function id": 0, "Ev Idx": 1206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333367979433.330, "dur": 5.204, + "args": { + "External id": 294584,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367979434.139, "dur": 3.990, + "args": { + "External id": 294585,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333367979434.876, "dur": 3.181, + "args": { + "External id": 294586,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367979443.051, "dur": 36636.837, + "args": { + "External id": 294587,"Record function id": 0, "Sequence number": 1209209, "Fwd thread id": 1, "Ev Idx": 1210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333367979444.444, "dur": 36625.582, + "args": { + "External id": 294588,"Sequence number": 1209209, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1211 + } + }, + { + "ph": "f", "id": 23, "pid": 2070552, "tid": 2107648, "ts": 5333367979444.444, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.24)", "pid": 2070552, "tid": 2107648, + "ts": 5333367979476.405, "dur": 41.716, + "args": { + "External id": 294589,"Record function id": 0, "Ev Idx": 1212 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.24)", "pid": 2070552, "tid": 2107648, + "ts": 5333367979526.090, "dur": 66.979, + "args": { + "External id": 294590,"Record function id": 0, "Ev Idx": 1213 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.24)", "pid": 2070552, "tid": 2107648, + "ts": 5333367979598.771, "dur": 36463.630, + "args": { + "External id": 294591,"Record function id": 0, "Ev Idx": 1214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367979737.104, "dur": 7.938, + "args": { + "External id": 294592,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333367979755.729, "dur": 8.274, + "args": { + "External id": 294593,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333367979779.105, "dur": 35418.769, + "args": { + "External id": 294594,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333367979793.045, "dur": 35391.322, + "args": { + "External id": 294595,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333367979841.266, "dur": 14.711, + "args": { + "External id": 294596,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333367979862.696, "dur": 35263.246, + "args": { + "External id": 294597,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333367979865.252, "dur": 35259.523, + "args": { + "External id": 294598,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333367979869.193, "dur": 5.400, + "args": { + "External id": 294599,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333367979876.340, "dur": 35244.095, + "args": { + "External id": 294600,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368015318.605, "dur": 11.741, + "args": { + "External id": 294601,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368015321.943, "dur": 7.958, + "args": { + "External id": 294602,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368015363.937, "dur": 393.772, + "args": { + "External id": 294603,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368015393.231, "dur": 358.968, + "args": { + "External id": 294604,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1227, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368015404.788, "dur": 341.074, + "args": { + "External id": 294605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368015780.859, "dur": 2.161, + "args": { + "External id": 294606,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1229, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368015847.295, "dur": 7.036, + "args": { + "External id": 294607,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368015900.158, "dur": 3.307, + "args": { + "External id": 294608,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368015918.022, "dur": 1.153, + "args": { + "External id": 294609,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368015932.097, "dur": 0.955, + "args": { + "External id": 294610,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368015947.155, "dur": 0.839, + "args": { + "External id": 294611,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368015958.645, "dur": 2.782, + "args": { + "External id": 294612,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368015973.364, "dur": 0.903, + "args": { + "External id": 294613,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368015986.235, "dur": 2.462, + "args": { + "External id": 294614,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368015998.632, "dur": 0.927, + "args": { + "External id": 294615,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368016094.995, "dur": 2914.268, + "args": { + "External id": 294616,"Record function id": 0, "Ev Idx": 1239 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.23)", "pid": 2070552, "tid": 2107648, + "ts": 5333368016116.055, "dur": 1123.015, + "args": { + "External id": 294617,"Record function id": 0, "Ev Idx": 1240 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2070552, "tid": 2107648, + "ts": 5333368016132.353, "dur": 354.130, + "args": { + "External id": 294618,"Record function id": 0, "Ev Idx": 1241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368016239.537, "dur": 6.292, + "args": { + "External id": 294619,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368016249.459, "dur": 1.012, + "args": { + "External id": 294620,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368016252.592, "dur": 0.871, + "args": { + "External id": 294621,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368016255.738, "dur": 0.678, + "args": { + "External id": 294622,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368016258.009, "dur": 0.918, + "args": { + "External id": 294623,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368016260.361, "dur": 1.026, + "args": { + "External id": 294624,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368016263.538, "dur": 2.234, + "args": { + "External id": 294625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368016267.533, "dur": 1.144, + "args": { + "External id": 294626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368016270.420, "dur": 2.152, + "args": { + "External id": 294627,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368016274.399, "dur": 0.954, + "args": { + "External id": 294628,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368016295.553, "dur": 161.758, + "args": { + "External id": 294629,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368016312.804, "dur": 139.657, + "args": { + "External id": 294630,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368016328.979, "dur": 13.162, + "args": { + "External id": 294631,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368016346.232, "dur": 79.090, + "args": { + "External id": 294632,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368016349.120, "dur": 75.808, + "args": { + "External id": 294633,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368016357.908, "dur": 7.540, + "args": { + "External id": 294634,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368016367.361, "dur": 56.889, + "args": { + "External id": 294635,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1258 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.22", "pid": 2070552, "tid": 2107648, + "ts": 5333368016581.655, "dur": 646.982, + "args": { + "External id": 294636,"Record function id": 0, "Ev Idx": 1259 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2070552, "tid": 2107648, + "ts": 5333368016601.991, "dur": 608.506, + "args": { + "External id": 294637,"Record function id": 0, "Ev Idx": 1260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368016705.865, "dur": 6.465, + "args": { + "External id": 294638,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368016729.901, "dur": 34.487, + "args": { + "External id": 294639,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368016735.136, "dur": 1.919, + "args": { + "External id": 294640,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368016739.498, "dur": 1.348, + "args": { + "External id": 294641,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368016742.465, "dur": 0.498, + "args": { + "External id": 294642,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368016744.571, "dur": 2.141, + "args": { + "External id": 294643,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368016749.277, "dur": 0.404, + "args": { + "External id": 294644,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368016751.237, "dur": 0.348, + "args": { + "External id": 294645,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368016753.519, "dur": 0.520, + "args": { + "External id": 294646,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368016756.460, "dur": 0.578, + "args": { + "External id": 294647,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368016758.748, "dur": 0.567, + "args": { + "External id": 294648,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368016775.652, "dur": 36.746, + "args": { + "External id": 294649,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368016844.253, "dur": 104.804, + "args": { + "External id": 294650,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368016854.508, "dur": 3.259, + "args": { + "External id": 294651,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368016862.773, "dur": 10.555, + "args": { + "External id": 294652,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368016867.373, "dur": 5.533, + "args": { + "External id": 294653,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368016870.953, "dur": 0.754, + "args": { + "External id": 294654,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368016880.220, "dur": 30.381, + "args": { + "External id": 294655,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368016882.584, "dur": 2.453, + "args": { + "External id": 294656,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368016886.788, "dur": 0.767, + "args": { + "External id": 294657,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368016888.971, "dur": 0.759, + "args": { + "External id": 294658,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368016891.436, "dur": 1.124, + "args": { + "External id": 294659,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368016894.174, "dur": 0.567, + "args": { + "External id": 294660,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368016896.207, "dur": 0.298, + "args": { + "External id": 294661,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368016899.109, "dur": 0.354, + "args": { + "External id": 294662,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368016901.334, "dur": 0.425, + "args": { + "External id": 294663,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368016903.169, "dur": 2.350, + "args": { + "External id": 294664,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368016921.335, "dur": 19.583, + "args": { + "External id": 294665,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368016992.741, "dur": 123.284, + "args": { + "External id": 294666,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368017020.752, "dur": 88.609, + "args": { + "External id": 294667,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1290, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368017030.465, "dur": 74.434, + "args": { + "External id": 294668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368017132.160, "dur": 1.776, + "args": { + "External id": 294669,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1292, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368017249.309, "dur": 1737.166, + "args": { + "External id": 294670,"Sequence number": 1209208, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1293 + } + }, + { + "ph": "f", "id": 24, "pid": 2070552, "tid": 2107648, "ts": 5333368017249.309, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368017378.843, "dur": 113.750, + "args": { + "External id": 294671,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368017542.631, "dur": 44.058, + "args": { + "External id": 294672,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368017602.535, "dur": 94.948, + "args": { + "External id": 294673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368017713.839, "dur": 35.666, + "args": { + "External id": 294674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368017756.141, "dur": 45.439, + "args": { + "External id": 294675,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368017810.611, "dur": 27.749, + "args": { + "External id": 294676,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368017845.877, "dur": 43.467, + "args": { + "External id": 294677,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368017916.175, "dur": 25.128, + "args": { + "External id": 294678,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368017959.472, "dur": 28.365, + "args": { + "External id": 294679,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368018007.423, "dur": 18.640, + "args": { + "External id": 294680,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368018039.836, "dur": 15.731, + "args": { + "External id": 294681,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368018062.816, "dur": 29.352, + "args": { + "External id": 294682,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368018095.436, "dur": 31.065, + "args": { + "External id": 294683,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368018159.694, "dur": 219.346, + "args": { + "External id": 294684,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368018277.305, "dur": 8.382, + "args": { + "External id": 294685,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368018288.311, "dur": 4.812, + "args": { + "External id": 294686,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368018418.378, "dur": 26.268, + "args": { + "External id": 294687,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368018456.157, "dur": 15.006, + "args": { + "External id": 294688,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368018479.210, "dur": 45.465, + "args": { + "External id": 294689,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368018529.910, "dur": 52.446, + "args": { + "External id": 294690,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368018595.854, "dur": 25.503, + "args": { + "External id": 294691,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368018663.496, "dur": 38.923, + "args": { + "External id": 294692,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368018710.770, "dur": 20.568, + "args": { + "External id": 294693,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368018738.530, "dur": 34.922, + "args": { + "External id": 294694,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368018795.955, "dur": 27.002, + "args": { + "External id": 294695,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368018840.517, "dur": 28.606, + "args": { + "External id": 294696,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368018884.420, "dur": 15.727, + "args": { + "External id": 294697,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368018914.713, "dur": 15.233, + "args": { + "External id": 294698,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368018943.246, "dur": 14.491, + "args": { + "External id": 294699,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368019031.927, "dur": 15.659, + "args": { + "External id": 294700,"Record function id": 0, "Ev Idx": 1323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368019035.634, "dur": 10.944, + "args": { + "External id": 294701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368019039.879, "dur": 5.758, + "args": { + "External id": 294702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368019041.287, "dur": 4.209, + "args": { + "External id": 294703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368019051.292, "dur": 5.651, + "args": { + "External id": 294704,"Record function id": 0, "Ev Idx": 1327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368019052.994, "dur": 3.524, + "args": { + "External id": 294705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368019054.125, "dur": 1.829, + "args": { + "External id": 294706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368019054.998, "dur": 0.886, + "args": { + "External id": 294707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368019060.074, "dur": 4.746, + "args": { + "External id": 294708,"Record function id": 0, "Ev Idx": 1331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368019061.602, "dur": 2.814, + "args": { + "External id": 294709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368019062.361, "dur": 1.369, + "args": { + "External id": 294710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368019062.961, "dur": 0.649, + "args": { + "External id": 294711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368019067.945, "dur": 4.511, + "args": { + "External id": 294712,"Record function id": 0, "Ev Idx": 1335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368019069.752, "dur": 2.288, + "args": { + "External id": 294713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368019070.391, "dur": 1.041, + "args": { + "External id": 294714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368019070.717, "dur": 0.631, + "args": { + "External id": 294715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368019075.711, "dur": 4.436, + "args": { + "External id": 294716,"Record function id": 0, "Ev Idx": 1339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368019077.036, "dur": 2.701, + "args": { + "External id": 294717,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368019077.540, "dur": 1.604, + "args": { + "External id": 294718,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368019078.125, "dur": 0.931, + "args": { + "External id": 294719,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368019083.225, "dur": 5.098, + "args": { + "External id": 294720,"Record function id": 0, "Ev Idx": 1343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368019084.852, "dur": 3.042, + "args": { + "External id": 294721,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368019085.497, "dur": 1.803, + "args": { + "External id": 294722,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368019086.421, "dur": 0.816, + "args": { + "External id": 294723,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368019091.612, "dur": 6.813, + "args": { + "External id": 294724,"Record function id": 0, "Ev Idx": 1347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368019093.154, "dur": 4.854, + "args": { + "External id": 294725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368019093.628, "dur": 3.961, + "args": { + "External id": 294726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368019094.402, "dur": 3.116, + "args": { + "External id": 294727,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368019101.482, "dur": 4.062, + "args": { + "External id": 294728,"Record function id": 0, "Ev Idx": 1351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368019102.798, "dur": 2.331, + "args": { + "External id": 294729,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368019103.241, "dur": 1.481, + "args": { + "External id": 294730,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368019103.807, "dur": 0.840, + "args": { + "External id": 294731,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368019108.555, "dur": 4.270, + "args": { + "External id": 294732,"Record function id": 0, "Ev Idx": 1355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368019109.836, "dur": 2.596, + "args": { + "External id": 294733,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368019110.451, "dur": 1.435, + "args": { + "External id": 294734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368019111.025, "dur": 0.786, + "args": { + "External id": 294735,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368019117.012, "dur": 38284.082, + "args": { + "External id": 294736,"Record function id": 0, "Sequence number": 1209207, "Fwd thread id": 1, "Ev Idx": 1359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368019118.732, "dur": 38272.587, + "args": { + "External id": 294737,"Sequence number": 1209207, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1360 + } + }, + { + "ph": "f", "id": 25, "pid": 2070552, "tid": 2107648, "ts": 5333368019118.732, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.23)", "pid": 2070552, "tid": 2107648, + "ts": 5333368019148.568, "dur": 68.573, + "args": { + "External id": 294738,"Record function id": 0, "Ev Idx": 1361 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.23)", "pid": 2070552, "tid": 2107648, + "ts": 5333368019229.766, "dur": 86.868, + "args": { + "External id": 294739,"Record function id": 0, "Ev Idx": 1362 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.23)", "pid": 2070552, "tid": 2107648, + "ts": 5333368019324.048, "dur": 38058.491, + "args": { + "External id": 294740,"Record function id": 0, "Ev Idx": 1363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368019421.355, "dur": 7.478, + "args": { + "External id": 294741,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368019440.025, "dur": 5.406, + "args": { + "External id": 294742,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368019459.984, "dur": 36970.487, + "args": { + "External id": 294743,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368019474.390, "dur": 36943.649, + "args": { + "External id": 294744,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368019523.773, "dur": 14.746, + "args": { + "External id": 294745,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368019545.408, "dur": 36833.394, + "args": { + "External id": 294746,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368019547.817, "dur": 36829.677, + "args": { + "External id": 294747,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368019552.158, "dur": 5.984, + "args": { + "External id": 294748,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368019560.306, "dur": 36812.776, + "args": { + "External id": 294749,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368056530.508, "dur": 11.142, + "args": { + "External id": 294750,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368056533.720, "dur": 7.503, + "args": { + "External id": 294751,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368056571.710, "dur": 443.016, + "args": { + "External id": 294752,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368056601.368, "dur": 407.763, + "args": { + "External id": 294753,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1376, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368056613.563, "dur": 389.428, + "args": { + "External id": 294754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368057034.542, "dur": 2.733, + "args": { + "External id": 294755,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1378, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368057101.007, "dur": 7.567, + "args": { + "External id": 294756,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368057153.791, "dur": 1.496, + "args": { + "External id": 294757,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368057193.384, "dur": 2.750, + "args": { + "External id": 294758,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368057215.086, "dur": 1.527, + "args": { + "External id": 294759,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368057233.342, "dur": 3.267, + "args": { + "External id": 294760,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368057250.983, "dur": 1.218, + "args": { + "External id": 294761,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368057269.133, "dur": 1.416, + "args": { + "External id": 294762,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368057285.486, "dur": 3.287, + "args": { + "External id": 294763,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368057302.615, "dur": 3.607, + "args": { + "External id": 294764,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368057418.207, "dur": 2884.097, + "args": { + "External id": 294765,"Record function id": 0, "Ev Idx": 1388 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.22)", "pid": 2070552, "tid": 2107648, + "ts": 5333368057442.403, "dur": 1086.885, + "args": { + "External id": 294766,"Record function id": 0, "Ev Idx": 1389 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2070552, "tid": 2107648, + "ts": 5333368057457.861, "dur": 367.281, + "args": { + "External id": 294767,"Record function id": 0, "Ev Idx": 1390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368057546.844, "dur": 4.488, + "args": { + "External id": 294768,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368057554.724, "dur": 1.624, + "args": { + "External id": 294769,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368057558.628, "dur": 1.236, + "args": { + "External id": 294770,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368057561.467, "dur": 1.316, + "args": { + "External id": 294771,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368057564.637, "dur": 1.286, + "args": { + "External id": 294772,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368057567.564, "dur": 1.330, + "args": { + "External id": 294773,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368057570.564, "dur": 3.947, + "args": { + "External id": 294774,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368057576.257, "dur": 1.022, + "args": { + "External id": 294775,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368057578.672, "dur": 1.103, + "args": { + "External id": 294776,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368057581.567, "dur": 1.278, + "args": { + "External id": 294777,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368057601.366, "dur": 190.837, + "args": { + "External id": 294778,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368057617.579, "dur": 169.392, + "args": { + "External id": 294779,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368057669.996, "dur": 13.332, + "args": { + "External id": 294780,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368057687.767, "dur": 71.780, + "args": { + "External id": 294781,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368057691.761, "dur": 67.404, + "args": { + "External id": 294782,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368057695.535, "dur": 7.154, + "args": { + "External id": 294783,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368057704.533, "dur": 54.124, + "args": { + "External id": 294784,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1407 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.21", "pid": 2070552, "tid": 2107648, + "ts": 5333368057915.740, "dur": 605.033, + "args": { + "External id": 294785,"Record function id": 0, "Ev Idx": 1408 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2070552, "tid": 2107648, + "ts": 5333368057932.900, "dur": 574.884, + "args": { + "External id": 294786,"Record function id": 0, "Ev Idx": 1409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368057998.581, "dur": 5.121, + "args": { + "External id": 294787,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368058019.535, "dur": 32.769, + "args": { + "External id": 294788,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368058024.020, "dur": 1.706, + "args": { + "External id": 294789,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368058028.707, "dur": 2.832, + "args": { + "External id": 294790,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368058032.475, "dur": 0.596, + "args": { + "External id": 294791,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368058033.664, "dur": 0.570, + "args": { + "External id": 294792,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368058035.758, "dur": 0.547, + "args": { + "External id": 294793,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368058038.038, "dur": 0.771, + "args": { + "External id": 294794,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368058040.413, "dur": 0.865, + "args": { + "External id": 294795,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368058043.149, "dur": 0.582, + "args": { + "External id": 294796,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368058045.377, "dur": 0.569, + "args": { + "External id": 294797,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368058064.266, "dur": 32.578, + "args": { + "External id": 294798,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368058129.447, "dur": 149.302, + "args": { + "External id": 294799,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368058139.688, "dur": 5.065, + "args": { + "External id": 294800,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368058149.637, "dur": 10.602, + "args": { + "External id": 294801,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368058153.903, "dur": 5.917, + "args": { + "External id": 294802,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368058158.032, "dur": 0.671, + "args": { + "External id": 294803,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368058189.828, "dur": 34.118, + "args": { + "External id": 294804,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368058192.735, "dur": 0.950, + "args": { + "External id": 294805,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368058195.942, "dur": 1.346, + "args": { + "External id": 294806,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368058198.925, "dur": 0.988, + "args": { + "External id": 294807,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368058201.541, "dur": 0.666, + "args": { + "External id": 294808,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368058204.935, "dur": 0.508, + "args": { + "External id": 294809,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368058206.938, "dur": 0.607, + "args": { + "External id": 294810,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368058209.092, "dur": 2.396, + "args": { + "External id": 294811,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368058214.678, "dur": 0.829, + "args": { + "External id": 294812,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368058217.173, "dur": 0.680, + "args": { + "External id": 294813,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368058241.049, "dur": 29.070, + "args": { + "External id": 294814,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368058328.768, "dur": 114.450, + "args": { + "External id": 294815,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368058351.073, "dur": 88.779, + "args": { + "External id": 294816,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1439, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368058360.012, "dur": 74.670, + "args": { + "External id": 294817,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368058454.597, "dur": 2.156, + "args": { + "External id": 294818,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1441, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368058536.255, "dur": 1740.472, + "args": { + "External id": 294819,"Sequence number": 1209206, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1442 + } + }, + { + "ph": "f", "id": 26, "pid": 2070552, "tid": 2107648, "ts": 5333368058536.255, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368058690.635, "dur": 108.047, + "args": { + "External id": 294820,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368058837.161, "dur": 39.104, + "args": { + "External id": 294821,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368058898.509, "dur": 50.345, + "args": { + "External id": 294822,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368058962.251, "dur": 32.189, + "args": { + "External id": 294823,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368059001.984, "dur": 48.501, + "args": { + "External id": 294824,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368059058.576, "dur": 31.847, + "args": { + "External id": 294825,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368059100.308, "dur": 44.702, + "args": { + "External id": 294826,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368059183.778, "dur": 37.752, + "args": { + "External id": 294827,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368059247.717, "dur": 33.936, + "args": { + "External id": 294828,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368059306.297, "dur": 19.123, + "args": { + "External id": 294829,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368059339.557, "dur": 15.615, + "args": { + "External id": 294830,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368059366.132, "dur": 36.265, + "args": { + "External id": 294831,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368059405.868, "dur": 36.151, + "args": { + "External id": 294832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368059471.205, "dur": 204.708, + "args": { + "External id": 294833,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368059547.097, "dur": 6.393, + "args": { + "External id": 294834,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368059555.152, "dur": 4.816, + "args": { + "External id": 294835,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368059715.381, "dur": 41.884, + "args": { + "External id": 294836,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368059780.247, "dur": 19.660, + "args": { + "External id": 294837,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368059811.396, "dur": 48.228, + "args": { + "External id": 294838,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368059865.318, "dur": 41.201, + "args": { + "External id": 294839,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368059914.302, "dur": 20.453, + "args": { + "External id": 294840,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368059944.387, "dur": 32.337, + "args": { + "External id": 294841,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368059982.834, "dur": 19.453, + "args": { + "External id": 294842,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368060008.902, "dur": 29.298, + "args": { + "External id": 294843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368060055.977, "dur": 21.767, + "args": { + "External id": 294844,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368060096.730, "dur": 25.010, + "args": { + "External id": 294845,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368060136.546, "dur": 16.920, + "args": { + "External id": 294846,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368060184.330, "dur": 22.926, + "args": { + "External id": 294847,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368060227.176, "dur": 17.232, + "args": { + "External id": 294848,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368060325.114, "dur": 16.144, + "args": { + "External id": 294849,"Record function id": 0, "Ev Idx": 1472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368060328.354, "dur": 11.777, + "args": { + "External id": 294850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368060333.171, "dur": 5.923, + "args": { + "External id": 294851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368060334.703, "dur": 4.297, + "args": { + "External id": 294852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368060344.868, "dur": 4.647, + "args": { + "External id": 294853,"Record function id": 0, "Ev Idx": 1476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368060345.966, "dur": 3.122, + "args": { + "External id": 294854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368060346.745, "dur": 1.746, + "args": { + "External id": 294855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368060347.293, "dur": 1.080, + "args": { + "External id": 294856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368060352.920, "dur": 4.639, + "args": { + "External id": 294857,"Record function id": 0, "Ev Idx": 1480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368060354.568, "dur": 2.583, + "args": { + "External id": 294858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368060355.233, "dur": 1.295, + "args": { + "External id": 294859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368060355.616, "dur": 0.822, + "args": { + "External id": 294860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368060360.942, "dur": 3.567, + "args": { + "External id": 294861,"Record function id": 0, "Ev Idx": 1484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368060362.016, "dur": 2.060, + "args": { + "External id": 294862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368060362.644, "dur": 1.008, + "args": { + "External id": 294863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368060362.925, "dur": 0.659, + "args": { + "External id": 294864,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368060367.733, "dur": 4.144, + "args": { + "External id": 294865,"Record function id": 0, "Ev Idx": 1488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368060369.095, "dur": 2.370, + "args": { + "External id": 294866,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368060369.632, "dur": 1.317, + "args": { + "External id": 294867,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368060370.188, "dur": 0.652, + "args": { + "External id": 294868,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368060375.187, "dur": 6.655, + "args": { + "External id": 294869,"Record function id": 0, "Ev Idx": 1492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368060376.371, "dur": 5.055, + "args": { + "External id": 294870,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368060377.271, "dur": 3.594, + "args": { + "External id": 294871,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368060378.082, "dur": 2.713, + "args": { + "External id": 294872,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368060385.228, "dur": 5.455, + "args": { + "External id": 294873,"Record function id": 0, "Ev Idx": 1496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368060386.621, "dur": 3.658, + "args": { + "External id": 294874,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368060387.564, "dur": 2.164, + "args": { + "External id": 294875,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368060388.365, "dur": 1.289, + "args": { + "External id": 294876,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368060394.186, "dur": 4.530, + "args": { + "External id": 294877,"Record function id": 0, "Ev Idx": 1500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368060395.570, "dur": 2.738, + "args": { + "External id": 294878,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368060396.023, "dur": 1.749, + "args": { + "External id": 294879,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368060396.988, "dur": 0.713, + "args": { + "External id": 294880,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368060401.992, "dur": 3.863, + "args": { + "External id": 294881,"Record function id": 0, "Ev Idx": 1504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368060403.150, "dur": 2.298, + "args": { + "External id": 294882,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368060403.603, "dur": 1.465, + "args": { + "External id": 294883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368060404.375, "dur": 0.619, + "args": { + "External id": 294884,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368060409.902, "dur": 35645.577, + "args": { + "External id": 294885,"Record function id": 0, "Sequence number": 1209205, "Fwd thread id": 1, "Ev Idx": 1508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368060411.282, "dur": 35635.158, + "args": { + "External id": 294886,"Sequence number": 1209205, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1509 + } + }, + { + "ph": "f", "id": 27, "pid": 2070552, "tid": 2107648, "ts": 5333368060411.282, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.22)", "pid": 2070552, "tid": 2107648, + "ts": 5333368060439.952, "dur": 41.589, + "args": { + "External id": 294887,"Record function id": 0, "Ev Idx": 1510 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.22)", "pid": 2070552, "tid": 2107648, + "ts": 5333368060489.174, "dur": 65.719, + "args": { + "External id": 294888,"Record function id": 0, "Ev Idx": 1511 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.22)", "pid": 2070552, "tid": 2107648, + "ts": 5333368060561.265, "dur": 35477.180, + "args": { + "External id": 294889,"Record function id": 0, "Ev Idx": 1512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368060687.007, "dur": 8.230, + "args": { + "External id": 294890,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368060706.513, "dur": 4.848, + "args": { + "External id": 294891,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368060728.688, "dur": 34419.664, + "args": { + "External id": 294892,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368060742.627, "dur": 34394.430, + "args": { + "External id": 294893,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368060785.704, "dur": 16.286, + "args": { + "External id": 294894,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368060808.552, "dur": 34292.425, + "args": { + "External id": 294895,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368060811.247, "dur": 34288.956, + "args": { + "External id": 294896,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368060815.281, "dur": 4.689, + "args": { + "External id": 294897,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368060821.577, "dur": 34274.450, + "args": { + "External id": 294898,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368095267.426, "dur": 12.929, + "args": { + "External id": 294899,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368095270.391, "dur": 9.463, + "args": { + "External id": 294900,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368095310.592, "dur": 410.495, + "args": { + "External id": 294901,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368095339.663, "dur": 376.190, + "args": { + "External id": 294902,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1525, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368095351.769, "dur": 357.548, + "args": { + "External id": 294903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368095746.174, "dur": 2.196, + "args": { + "External id": 294904,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1527, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368095812.612, "dur": 7.179, + "args": { + "External id": 294905,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368095864.695, "dur": 1.080, + "args": { + "External id": 294906,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368095880.490, "dur": 1.265, + "args": { + "External id": 294907,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368095893.515, "dur": 1.220, + "args": { + "External id": 294908,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368095908.366, "dur": 1.698, + "args": { + "External id": 294909,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368095920.596, "dur": 0.649, + "args": { + "External id": 294910,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368095931.502, "dur": 1.555, + "args": { + "External id": 294911,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368095944.008, "dur": 2.346, + "args": { + "External id": 294912,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368095975.373, "dur": 0.830, + "args": { + "External id": 294913,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368096069.915, "dur": 2926.774, + "args": { + "External id": 294914,"Record function id": 0, "Ev Idx": 1537 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.21)", "pid": 2070552, "tid": 2107648, + "ts": 5333368096090.345, "dur": 1066.647, + "args": { + "External id": 294915,"Record function id": 0, "Ev Idx": 1538 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2070552, "tid": 2107648, + "ts": 5333368096106.655, "dur": 360.364, + "args": { + "External id": 294916,"Record function id": 0, "Ev Idx": 1539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368096220.885, "dur": 5.916, + "args": { + "External id": 294917,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368096231.761, "dur": 1.170, + "args": { + "External id": 294918,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368096234.648, "dur": 1.203, + "args": { + "External id": 294919,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368096237.663, "dur": 0.695, + "args": { + "External id": 294920,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368096239.889, "dur": 1.102, + "args": { + "External id": 294921,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368096242.940, "dur": 2.827, + "args": { + "External id": 294922,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368096247.257, "dur": 2.074, + "args": { + "External id": 294923,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368096251.057, "dur": 0.997, + "args": { + "External id": 294924,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368096253.931, "dur": 1.245, + "args": { + "External id": 294925,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368096256.930, "dur": 0.547, + "args": { + "External id": 294926,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368096277.062, "dur": 158.778, + "args": { + "External id": 294927,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368096295.201, "dur": 135.897, + "args": { + "External id": 294928,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368096311.622, "dur": 12.636, + "args": { + "External id": 294929,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368096328.210, "dur": 73.978, + "args": { + "External id": 294930,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368096330.848, "dur": 70.979, + "args": { + "External id": 294931,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368096334.898, "dur": 7.271, + "args": { + "External id": 294932,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368096343.777, "dur": 57.507, + "args": { + "External id": 294933,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1556 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.20", "pid": 2070552, "tid": 2107648, + "ts": 5333368096562.011, "dur": 588.089, + "args": { + "External id": 294934,"Record function id": 0, "Ev Idx": 1557 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2070552, "tid": 2107648, + "ts": 5333368096577.657, "dur": 560.200, + "args": { + "External id": 294935,"Record function id": 0, "Ev Idx": 1558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368096679.783, "dur": 6.943, + "args": { + "External id": 294936,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368096704.015, "dur": 33.329, + "args": { + "External id": 294937,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368096709.526, "dur": 3.500, + "args": { + "External id": 294938,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368096714.866, "dur": 1.732, + "args": { + "External id": 294939,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368096718.212, "dur": 0.462, + "args": { + "External id": 294940,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368096719.654, "dur": 0.338, + "args": { + "External id": 294941,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368096722.627, "dur": 0.427, + "args": { + "External id": 294942,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368096723.784, "dur": 0.317, + "args": { + "External id": 294943,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368096725.855, "dur": 0.342, + "args": { + "External id": 294944,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368096728.006, "dur": 0.529, + "args": { + "External id": 294945,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368096730.189, "dur": 2.520, + "args": { + "External id": 294946,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368096748.331, "dur": 34.660, + "args": { + "External id": 294947,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368096812.700, "dur": 102.326, + "args": { + "External id": 294948,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368096822.698, "dur": 3.650, + "args": { + "External id": 294949,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368096831.013, "dur": 13.020, + "args": { + "External id": 294950,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368096838.651, "dur": 4.946, + "args": { + "External id": 294951,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368096841.852, "dur": 0.589, + "args": { + "External id": 294952,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368096850.557, "dur": 26.265, + "args": { + "External id": 294953,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368096852.180, "dur": 0.477, + "args": { + "External id": 294954,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368096854.862, "dur": 0.407, + "args": { + "External id": 294955,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368096856.814, "dur": 0.419, + "args": { + "External id": 294956,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368096858.029, "dur": 1.784, + "args": { + "External id": 294957,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368096861.232, "dur": 0.425, + "args": { + "External id": 294958,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368096862.800, "dur": 2.344, + "args": { + "External id": 294959,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368096868.036, "dur": 0.570, + "args": { + "External id": 294960,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368096869.346, "dur": 0.724, + "args": { + "External id": 294961,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368096871.691, "dur": 0.397, + "args": { + "External id": 294962,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368096886.853, "dur": 20.770, + "args": { + "External id": 294963,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368096959.136, "dur": 114.387, + "args": { + "External id": 294964,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368096984.713, "dur": 85.380, + "args": { + "External id": 294965,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1588, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368096993.709, "dur": 71.903, + "args": { + "External id": 294966,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368097089.348, "dur": 1.612, + "args": { + "External id": 294967,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1590, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368097164.191, "dur": 1803.204, + "args": { + "External id": 294968,"Sequence number": 1209204, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1591 + } + }, + { + "ph": "f", "id": 28, "pid": 2070552, "tid": 2107648, "ts": 5333368097164.191, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368097329.866, "dur": 120.801, + "args": { + "External id": 294969,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368097499.637, "dur": 40.779, + "args": { + "External id": 294970,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368097556.758, "dur": 49.058, + "args": { + "External id": 294971,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368097615.406, "dur": 76.144, + "args": { + "External id": 294972,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368097702.866, "dur": 47.499, + "args": { + "External id": 294973,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368097761.572, "dur": 29.640, + "args": { + "External id": 294974,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368097799.941, "dur": 42.822, + "args": { + "External id": 294975,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368097870.517, "dur": 26.398, + "args": { + "External id": 294976,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368097916.677, "dur": 29.040, + "args": { + "External id": 294977,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368097968.248, "dur": 18.755, + "args": { + "External id": 294978,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368098001.696, "dur": 15.205, + "args": { + "External id": 294979,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368098027.064, "dur": 29.954, + "args": { + "External id": 294980,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368098059.851, "dur": 32.220, + "args": { + "External id": 294981,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368098120.819, "dur": 209.842, + "args": { + "External id": 294982,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368098220.710, "dur": 9.001, + "args": { + "External id": 294983,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368098232.359, "dur": 8.762, + "args": { + "External id": 294984,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368098375.830, "dur": 29.642, + "args": { + "External id": 294985,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368098418.814, "dur": 13.987, + "args": { + "External id": 294986,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368098442.128, "dur": 47.598, + "args": { + "External id": 294987,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368098506.566, "dur": 51.499, + "args": { + "External id": 294988,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368098569.589, "dur": 21.170, + "args": { + "External id": 294989,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368098596.610, "dur": 65.548, + "args": { + "External id": 294990,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368098675.369, "dur": 24.244, + "args": { + "External id": 294991,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368098707.299, "dur": 30.491, + "args": { + "External id": 294992,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368098761.810, "dur": 25.738, + "args": { + "External id": 294993,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368098806.734, "dur": 27.471, + "args": { + "External id": 294994,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368098849.973, "dur": 20.358, + "args": { + "External id": 294995,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368098887.912, "dur": 16.088, + "args": { + "External id": 294996,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368098922.165, "dur": 16.152, + "args": { + "External id": 294997,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368099019.263, "dur": 15.599, + "args": { + "External id": 294998,"Record function id": 0, "Ev Idx": 1621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368099022.718, "dur": 10.991, + "args": { + "External id": 294999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368099026.961, "dur": 5.821, + "args": { + "External id": 295000,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368099028.366, "dur": 4.322, + "args": { + "External id": 295001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368099038.687, "dur": 7.730, + "args": { + "External id": 295002,"Record function id": 0, "Ev Idx": 1625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368099043.079, "dur": 2.871, + "args": { + "External id": 295003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368099044.026, "dur": 1.224, + "args": { + "External id": 295004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368099044.362, "dur": 0.761, + "args": { + "External id": 295005,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368099050.023, "dur": 4.307, + "args": { + "External id": 295006,"Record function id": 0, "Ev Idx": 1629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368099051.100, "dur": 2.821, + "args": { + "External id": 295007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368099052.104, "dur": 1.388, + "args": { + "External id": 295008,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368099052.627, "dur": 0.792, + "args": { + "External id": 295009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368099057.916, "dur": 3.991, + "args": { + "External id": 295010,"Record function id": 0, "Ev Idx": 1633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368099059.198, "dur": 2.261, + "args": { + "External id": 295011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368099059.877, "dur": 0.963, + "args": { + "External id": 295012,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368099060.136, "dur": 0.628, + "args": { + "External id": 295013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368099065.250, "dur": 6.696, + "args": { + "External id": 295014,"Record function id": 0, "Ev Idx": 1637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368099066.452, "dur": 5.083, + "args": { + "External id": 295015,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368099067.158, "dur": 3.989, + "args": { + "External id": 295016,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368099068.155, "dur": 2.926, + "args": { + "External id": 295017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368099075.428, "dur": 4.656, + "args": { + "External id": 295018,"Record function id": 0, "Ev Idx": 1641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368099076.421, "dur": 3.240, + "args": { + "External id": 295019,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368099076.989, "dur": 2.113, + "args": { + "External id": 295020,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368099077.869, "dur": 1.129, + "args": { + "External id": 295021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368099083.621, "dur": 4.326, + "args": { + "External id": 295022,"Record function id": 0, "Ev Idx": 1645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368099085.055, "dur": 2.482, + "args": { + "External id": 295023,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368099085.690, "dur": 1.437, + "args": { + "External id": 295024,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368099086.327, "dur": 0.730, + "args": { + "External id": 295025,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368099091.311, "dur": 4.419, + "args": { + "External id": 295026,"Record function id": 0, "Ev Idx": 1649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368099092.371, "dur": 2.948, + "args": { + "External id": 295027,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368099093.365, "dur": 1.551, + "args": { + "External id": 295028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368099093.965, "dur": 0.872, + "args": { + "External id": 295029,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368099098.906, "dur": 7.255, + "args": { + "External id": 295030,"Record function id": 0, "Ev Idx": 1653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368099103.188, "dur": 2.537, + "args": { + "External id": 295031,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368099103.614, "dur": 1.494, + "args": { + "External id": 295032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368099104.189, "dur": 0.845, + "args": { + "External id": 295033,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368099110.124, "dur": 36176.997, + "args": { + "External id": 295034,"Record function id": 0, "Sequence number": 1209203, "Fwd thread id": 1, "Ev Idx": 1657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368099111.609, "dur": 36165.773, + "args": { + "External id": 295035,"Sequence number": 1209203, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1658 + } + }, + { + "ph": "f", "id": 29, "pid": 2070552, "tid": 2107648, "ts": 5333368099111.609, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.21)", "pid": 2070552, "tid": 2107648, + "ts": 5333368099142.027, "dur": 64.612, + "args": { + "External id": 295036,"Record function id": 0, "Ev Idx": 1659 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.21)", "pid": 2070552, "tid": 2107648, + "ts": 5333368099218.970, "dur": 79.126, + "args": { + "External id": 295037,"Record function id": 0, "Ev Idx": 1660 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.21)", "pid": 2070552, "tid": 2107648, + "ts": 5333368099304.284, "dur": 35964.955, + "args": { + "External id": 295038,"Record function id": 0, "Ev Idx": 1661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368099400.131, "dur": 8.159, + "args": { + "External id": 295039,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368099418.251, "dur": 5.275, + "args": { + "External id": 295040,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368099439.341, "dur": 34955.119, + "args": { + "External id": 295041,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368099453.322, "dur": 34929.212, + "args": { + "External id": 295042,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368099497.036, "dur": 17.789, + "args": { + "External id": 295043,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368099521.283, "dur": 34815.148, + "args": { + "External id": 295044,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368099523.696, "dur": 34811.872, + "args": { + "External id": 295045,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368099528.422, "dur": 7.132, + "args": { + "External id": 295046,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368099537.099, "dur": 34794.010, + "args": { + "External id": 295047,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368134499.700, "dur": 11.088, + "args": { + "External id": 295048,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368134502.786, "dur": 7.634, + "args": { + "External id": 295049,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368134540.571, "dur": 392.502, + "args": { + "External id": 295050,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368134568.327, "dur": 359.633, + "args": { + "External id": 295051,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1674, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368134580.461, "dur": 341.697, + "args": { + "External id": 295052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368134956.023, "dur": 2.319, + "args": { + "External id": 295053,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1676, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368135021.412, "dur": 6.821, + "args": { + "External id": 295054,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368135071.932, "dur": 1.375, + "args": { + "External id": 295055,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368135088.435, "dur": 1.230, + "args": { + "External id": 295056,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368135100.050, "dur": 2.880, + "args": { + "External id": 295057,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368135114.310, "dur": 0.846, + "args": { + "External id": 295058,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368135125.169, "dur": 0.940, + "args": { + "External id": 295059,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368135137.623, "dur": 0.930, + "args": { + "External id": 295060,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368135149.914, "dur": 4.191, + "args": { + "External id": 295061,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368135165.002, "dur": 20.234, + "args": { + "External id": 295062,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368135304.482, "dur": 2850.291, + "args": { + "External id": 295063,"Record function id": 0, "Ev Idx": 1686 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.20)", "pid": 2070552, "tid": 2107648, + "ts": 5333368135326.329, "dur": 1070.889, + "args": { + "External id": 295064,"Record function id": 0, "Ev Idx": 1687 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2070552, "tid": 2107648, + "ts": 5333368135341.944, "dur": 372.022, + "args": { + "External id": 295065,"Record function id": 0, "Ev Idx": 1688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368135429.498, "dur": 4.331, + "args": { + "External id": 295066,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368135437.070, "dur": 1.128, + "args": { + "External id": 295067,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368135440.746, "dur": 0.867, + "args": { + "External id": 295068,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368135443.596, "dur": 1.015, + "args": { + "External id": 295069,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368135446.611, "dur": 2.475, + "args": { + "External id": 295070,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368135450.764, "dur": 1.215, + "args": { + "External id": 295071,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368135454.037, "dur": 1.693, + "args": { + "External id": 295072,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368135457.337, "dur": 0.731, + "args": { + "External id": 295073,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368135459.848, "dur": 0.935, + "args": { + "External id": 295074,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368135462.566, "dur": 0.604, + "args": { + "External id": 295075,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368135481.761, "dur": 199.402, + "args": { + "External id": 295076,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368135498.568, "dur": 176.881, + "args": { + "External id": 295077,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368135515.906, "dur": 12.566, + "args": { + "External id": 295078,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368135532.158, "dur": 71.231, + "args": { + "External id": 295079,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368135534.524, "dur": 68.504, + "args": { + "External id": 295080,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368135539.157, "dur": 7.473, + "args": { + "External id": 295081,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368135548.615, "dur": 53.821, + "args": { + "External id": 295082,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1705 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.19", "pid": 2070552, "tid": 2107648, + "ts": 5333368135805.834, "dur": 584.406, + "args": { + "External id": 295083,"Record function id": 0, "Ev Idx": 1706 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2070552, "tid": 2107648, + "ts": 5333368135823.135, "dur": 554.603, + "args": { + "External id": 295084,"Record function id": 0, "Ev Idx": 1707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368135885.057, "dur": 7.813, + "args": { + "External id": 295085,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368135909.344, "dur": 32.248, + "args": { + "External id": 295086,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368135914.460, "dur": 1.680, + "args": { + "External id": 295087,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368135918.151, "dur": 1.535, + "args": { + "External id": 295088,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368135921.298, "dur": 0.718, + "args": { + "External id": 295089,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368135923.047, "dur": 0.515, + "args": { + "External id": 295090,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368135925.956, "dur": 0.566, + "args": { + "External id": 295091,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368135927.489, "dur": 0.383, + "args": { + "External id": 295092,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368135929.018, "dur": 0.855, + "args": { + "External id": 295093,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368135931.885, "dur": 2.327, + "args": { + "External id": 295094,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368135935.908, "dur": 0.638, + "args": { + "External id": 295095,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368135951.833, "dur": 32.630, + "args": { + "External id": 295096,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368136013.319, "dur": 101.368, + "args": { + "External id": 295097,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368136023.155, "dur": 3.404, + "args": { + "External id": 295098,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368136031.294, "dur": 10.040, + "args": { + "External id": 295099,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368136035.460, "dur": 5.481, + "args": { + "External id": 295100,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368136039.154, "dur": 0.507, + "args": { + "External id": 295101,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368136049.418, "dur": 23.058, + "args": { + "External id": 295102,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368136050.885, "dur": 0.329, + "args": { + "External id": 295103,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368136052.753, "dur": 0.590, + "args": { + "External id": 295104,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368136054.732, "dur": 0.428, + "args": { + "External id": 295105,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368136056.225, "dur": 1.182, + "args": { + "External id": 295106,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368136058.550, "dur": 2.202, + "args": { + "External id": 295107,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368136061.800, "dur": 0.586, + "args": { + "External id": 295108,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368136064.958, "dur": 0.377, + "args": { + "External id": 295109,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368136066.566, "dur": 0.526, + "args": { + "External id": 295110,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368136068.491, "dur": 0.531, + "args": { + "External id": 295111,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368136087.805, "dur": 19.845, + "args": { + "External id": 295112,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368136157.313, "dur": 147.885, + "args": { + "External id": 295113,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368136206.088, "dur": 95.037, + "args": { + "External id": 295114,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1737, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368136218.502, "dur": 78.277, + "args": { + "External id": 295115,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368136321.123, "dur": 2.045, + "args": { + "External id": 295116,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1739, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368136404.808, "dur": 1728.664, + "args": { + "External id": 295117,"Sequence number": 1209202, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1740 + } + }, + { + "ph": "f", "id": 30, "pid": 2070552, "tid": 2107648, "ts": 5333368136404.808, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368136520.331, "dur": 150.825, + "args": { + "External id": 295118,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368136716.771, "dur": 42.880, + "args": { + "External id": 295119,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368136777.600, "dur": 57.058, + "args": { + "External id": 295120,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368136844.252, "dur": 33.492, + "args": { + "External id": 295121,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368136886.482, "dur": 49.324, + "args": { + "External id": 295122,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368136942.639, "dur": 31.079, + "args": { + "External id": 295123,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368136981.720, "dur": 42.641, + "args": { + "External id": 295124,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368137047.042, "dur": 23.257, + "args": { + "External id": 295125,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368137087.728, "dur": 27.915, + "args": { + "External id": 295126,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368137142.927, "dur": 19.170, + "args": { + "External id": 295127,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368137201.688, "dur": 22.367, + "args": { + "External id": 295128,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368137239.913, "dur": 41.665, + "args": { + "External id": 295129,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368137286.335, "dur": 34.466, + "args": { + "External id": 295130,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368137361.431, "dur": 171.639, + "args": { + "External id": 295131,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368137440.094, "dur": 6.384, + "args": { + "External id": 295132,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368137448.439, "dur": 4.006, + "args": { + "External id": 295133,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368137567.768, "dur": 26.094, + "args": { + "External id": 295134,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368137604.208, "dur": 13.084, + "args": { + "External id": 295135,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368137663.870, "dur": 63.579, + "args": { + "External id": 295136,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368137742.252, "dur": 41.345, + "args": { + "External id": 295137,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368137792.830, "dur": 20.701, + "args": { + "External id": 295138,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368137819.628, "dur": 31.454, + "args": { + "External id": 295139,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368137856.458, "dur": 21.520, + "args": { + "External id": 295140,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368137885.440, "dur": 33.652, + "args": { + "External id": 295141,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368137940.790, "dur": 24.055, + "args": { + "External id": 295142,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368137983.387, "dur": 24.150, + "args": { + "External id": 295143,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368138023.868, "dur": 16.392, + "args": { + "External id": 295144,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368138057.410, "dur": 13.796, + "args": { + "External id": 295145,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368138088.013, "dur": 14.708, + "args": { + "External id": 295146,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368138200.782, "dur": 19.960, + "args": { + "External id": 295147,"Record function id": 0, "Ev Idx": 1770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368138205.811, "dur": 13.328, + "args": { + "External id": 295148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368138210.761, "dur": 6.823, + "args": { + "External id": 295149,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368138212.334, "dur": 4.877, + "args": { + "External id": 295150,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368138227.642, "dur": 6.681, + "args": { + "External id": 295151,"Record function id": 0, "Ev Idx": 1774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368138229.720, "dur": 3.914, + "args": { + "External id": 295152,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368138230.774, "dur": 2.129, + "args": { + "External id": 295153,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368138231.473, "dur": 1.267, + "args": { + "External id": 295154,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368138242.809, "dur": 6.344, + "args": { + "External id": 295155,"Record function id": 0, "Ev Idx": 1778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368138245.098, "dur": 3.380, + "args": { + "External id": 295156,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368138246.073, "dur": 1.801, + "args": { + "External id": 295157,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368138246.723, "dur": 1.033, + "args": { + "External id": 295158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368138254.333, "dur": 5.758, + "args": { + "External id": 295159,"Record function id": 0, "Ev Idx": 1782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368138256.278, "dur": 3.075, + "args": { + "External id": 295160,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368138257.172, "dur": 1.593, + "args": { + "External id": 295161,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368138257.711, "dur": 0.941, + "args": { + "External id": 295162,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368138264.975, "dur": 6.336, + "args": { + "External id": 295163,"Record function id": 0, "Ev Idx": 1786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368138266.585, "dur": 4.273, + "args": { + "External id": 295164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368138267.165, "dur": 3.202, + "args": { + "External id": 295165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368138267.562, "dur": 2.671, + "args": { + "External id": 295166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368138275.195, "dur": 4.847, + "args": { + "External id": 295167,"Record function id": 0, "Ev Idx": 1790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368138276.519, "dur": 3.118, + "args": { + "External id": 295168,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368138277.168, "dur": 2.017, + "args": { + "External id": 295169,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368138278.123, "dur": 0.998, + "args": { + "External id": 295170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368138283.393, "dur": 4.038, + "args": { + "External id": 295171,"Record function id": 0, "Ev Idx": 1794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368138284.506, "dur": 2.509, + "args": { + "External id": 295172,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368138285.189, "dur": 1.417, + "args": { + "External id": 295173,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368138285.930, "dur": 0.605, + "args": { + "External id": 295174,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368138290.935, "dur": 3.812, + "args": { + "External id": 295175,"Record function id": 0, "Ev Idx": 1798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368138292.177, "dur": 2.155, + "args": { + "External id": 295176,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368138292.613, "dur": 1.247, + "args": { + "External id": 295177,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368138293.149, "dur": 0.648, + "args": { + "External id": 295178,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368138301.763, "dur": 3.863, + "args": { + "External id": 295179,"Record function id": 0, "Ev Idx": 1802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368138303.022, "dur": 2.204, + "args": { + "External id": 295180,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368138303.432, "dur": 1.426, + "args": { + "External id": 295181,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368138304.140, "dur": 0.646, + "args": { + "External id": 295182,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368138309.630, "dur": 35794.651, + "args": { + "External id": 295183,"Record function id": 0, "Sequence number": 1209201, "Fwd thread id": 1, "Ev Idx": 1806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368138311.109, "dur": 35784.077, + "args": { + "External id": 295184,"Sequence number": 1209201, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1807 + } + }, + { + "ph": "f", "id": 31, "pid": 2070552, "tid": 2107648, "ts": 5333368138311.109, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.20)", "pid": 2070552, "tid": 2107648, + "ts": 5333368138342.507, "dur": 42.692, + "args": { + "External id": 295185,"Record function id": 0, "Ev Idx": 1808 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.20)", "pid": 2070552, "tid": 2107648, + "ts": 5333368138392.803, "dur": 70.157, + "args": { + "External id": 295186,"Record function id": 0, "Ev Idx": 1809 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.20)", "pid": 2070552, "tid": 2107648, + "ts": 5333368138469.199, "dur": 35618.500, + "args": { + "External id": 295187,"Record function id": 0, "Ev Idx": 1810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368138562.480, "dur": 7.946, + "args": { + "External id": 295188,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368138583.134, "dur": 4.418, + "args": { + "External id": 295189,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368138601.983, "dur": 34655.451, + "args": { + "External id": 295190,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368138616.060, "dur": 34629.707, + "args": { + "External id": 295191,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368138700.350, "dur": 19.444, + "args": { + "External id": 295192,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368138725.901, "dur": 34476.038, + "args": { + "External id": 295193,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368138728.141, "dur": 34472.900, + "args": { + "External id": 295194,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368138731.534, "dur": 8.521, + "args": { + "External id": 295195,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368138741.706, "dur": 34454.999, + "args": { + "External id": 295196,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368173355.705, "dur": 10.884, + "args": { + "External id": 295197,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368173358.979, "dur": 7.138, + "args": { + "External id": 295198,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368173398.917, "dur": 389.472, + "args": { + "External id": 295199,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368173424.170, "dur": 359.171, + "args": { + "External id": 295200,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1823, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368173435.680, "dur": 341.766, + "args": { + "External id": 295201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368173809.686, "dur": 2.471, + "args": { + "External id": 295202,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1825, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368173876.368, "dur": 7.116, + "args": { + "External id": 295203,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368173927.193, "dur": 1.212, + "args": { + "External id": 295204,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368173945.413, "dur": 1.449, + "args": { + "External id": 295205,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368173958.829, "dur": 3.298, + "args": { + "External id": 295206,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368173973.508, "dur": 0.846, + "args": { + "External id": 295207,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368173984.788, "dur": 0.720, + "args": { + "External id": 295208,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368173995.784, "dur": 0.868, + "args": { + "External id": 295209,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174008.439, "dur": 3.987, + "args": { + "External id": 295210,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174022.868, "dur": 0.988, + "args": { + "External id": 295211,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368174119.186, "dur": 2898.450, + "args": { + "External id": 295212,"Record function id": 0, "Ev Idx": 1835 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.19)", "pid": 2070552, "tid": 2107648, + "ts": 5333368174139.851, "dur": 1114.719, + "args": { + "External id": 295213,"Record function id": 0, "Ev Idx": 1836 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2070552, "tid": 2107648, + "ts": 5333368174154.264, "dur": 364.212, + "args": { + "External id": 295214,"Record function id": 0, "Ev Idx": 1837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368174278.166, "dur": 5.202, + "args": { + "External id": 295215,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368174286.755, "dur": 1.478, + "args": { + "External id": 295216,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368174290.270, "dur": 1.390, + "args": { + "External id": 295217,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368174293.204, "dur": 1.103, + "args": { + "External id": 295218,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368174296.688, "dur": 2.961, + "args": { + "External id": 295219,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368174301.416, "dur": 1.297, + "args": { + "External id": 295220,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368174304.307, "dur": 1.733, + "args": { + "External id": 295221,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368174308.006, "dur": 0.798, + "args": { + "External id": 295222,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368174310.819, "dur": 0.646, + "args": { + "External id": 295223,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368174312.814, "dur": 1.421, + "args": { + "External id": 295224,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368174333.291, "dur": 155.247, + "args": { + "External id": 295225,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368174349.757, "dur": 134.221, + "args": { + "External id": 295226,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368174366.169, "dur": 12.993, + "args": { + "External id": 295227,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368174383.410, "dur": 72.168, + "args": { + "External id": 295228,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 1851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368174386.161, "dur": 69.136, + "args": { + "External id": 295229,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 1852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174389.639, "dur": 7.215, + "args": { + "External id": 295230,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368174398.440, "dur": 56.262, + "args": { + "External id": 295231,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 1854 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.18", "pid": 2070552, "tid": 2107648, + "ts": 5333368174609.114, "dur": 634.792, + "args": { + "External id": 295232,"Record function id": 0, "Ev Idx": 1855 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2070552, "tid": 2107648, + "ts": 5333368174668.411, "dur": 557.798, + "args": { + "External id": 295233,"Record function id": 0, "Ev Idx": 1856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368174737.001, "dur": 7.972, + "args": { + "External id": 295234,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368174762.970, "dur": 31.827, + "args": { + "External id": 295235,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174767.812, "dur": 1.591, + "args": { + "External id": 295236,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174771.646, "dur": 1.252, + "args": { + "External id": 295237,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174774.495, "dur": 0.410, + "args": { + "External id": 295238,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174776.131, "dur": 0.621, + "args": { + "External id": 295239,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174779.213, "dur": 0.334, + "args": { + "External id": 295240,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174780.790, "dur": 0.452, + "args": { + "External id": 295241,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174782.817, "dur": 0.497, + "args": { + "External id": 295242,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174785.335, "dur": 2.800, + "args": { + "External id": 295243,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174789.749, "dur": 0.907, + "args": { + "External id": 295244,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368174805.001, "dur": 36.053, + "args": { + "External id": 295245,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368174871.196, "dur": 96.171, + "args": { + "External id": 295246,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 1869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368174880.912, "dur": 4.062, + "args": { + "External id": 295247,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368174889.776, "dur": 10.298, + "args": { + "External id": 295248,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368174894.112, "dur": 5.537, + "args": { + "External id": 295249,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 1872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174897.481, "dur": 0.962, + "args": { + "External id": 295250,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 1873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368174906.776, "dur": 24.563, + "args": { + "External id": 295251,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 1874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174908.612, "dur": 0.562, + "args": { + "External id": 295252,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174910.945, "dur": 0.353, + "args": { + "External id": 295253,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174912.603, "dur": 0.332, + "args": { + "External id": 295254,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174914.227, "dur": 1.212, + "args": { + "External id": 295255,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174916.766, "dur": 2.306, + "args": { + "External id": 295256,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174920.166, "dur": 0.348, + "args": { + "External id": 295257,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174923.910, "dur": 0.398, + "args": { + "External id": 295258,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174925.150, "dur": 0.324, + "args": { + "External id": 295259,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368174926.902, "dur": 0.367, + "args": { + "External id": 295260,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368174941.564, "dur": 18.306, + "args": { + "External id": 295261,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 1884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368175012.878, "dur": 119.370, + "args": { + "External id": 295262,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 1885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368175038.323, "dur": 90.726, + "args": { + "External id": 295263,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1886, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368175047.729, "dur": 76.977, + "args": { + "External id": 295264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 1887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368175146.938, "dur": 1.883, + "args": { + "External id": 295265,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1888, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368175263.468, "dur": 1733.318, + "args": { + "External id": 295266,"Sequence number": 1209200, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1889 + } + }, + { + "ph": "f", "id": 32, "pid": 2070552, "tid": 2107648, "ts": 5333368175263.468, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368175379.152, "dur": 119.093, + "args": { + "External id": 295267,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 1890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368175543.904, "dur": 39.653, + "args": { + "External id": 295268,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 1891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368175609.800, "dur": 99.652, + "args": { + "External id": 295269,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 1892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368175725.913, "dur": 34.634, + "args": { + "External id": 295270,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368175769.384, "dur": 46.540, + "args": { + "External id": 295271,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368175822.227, "dur": 28.064, + "args": { + "External id": 295272,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 1895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368175857.151, "dur": 42.479, + "args": { + "External id": 295273,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 1896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368175925.892, "dur": 24.678, + "args": { + "External id": 295274,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 1897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368175970.367, "dur": 27.918, + "args": { + "External id": 295275,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368176019.266, "dur": 19.289, + "args": { + "External id": 295276,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368176051.072, "dur": 15.599, + "args": { + "External id": 295277,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368176075.701, "dur": 27.753, + "args": { + "External id": 295278,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368176106.706, "dur": 31.875, + "args": { + "External id": 295279,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368176185.663, "dur": 187.668, + "args": { + "External id": 295280,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 1903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368176276.288, "dur": 7.192, + "args": { + "External id": 295281,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368176285.996, "dur": 3.421, + "args": { + "External id": 295282,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368176407.510, "dur": 26.030, + "args": { + "External id": 295283,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368176445.443, "dur": 13.363, + "args": { + "External id": 295284,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368176467.341, "dur": 42.101, + "args": { + "External id": 295285,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368176520.129, "dur": 51.773, + "args": { + "External id": 295286,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368176585.586, "dur": 26.624, + "args": { + "External id": 295287,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368176618.793, "dur": 73.737, + "args": { + "External id": 295288,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368176701.908, "dur": 22.519, + "args": { + "External id": 295289,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 1912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368176735.685, "dur": 31.235, + "args": { + "External id": 295290,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 1913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368176788.749, "dur": 28.729, + "args": { + "External id": 295291,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 1914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368176837.408, "dur": 31.440, + "args": { + "External id": 295292,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 1915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368176885.196, "dur": 17.369, + "args": { + "External id": 295293,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 1916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368176918.450, "dur": 15.529, + "args": { + "External id": 295294,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 1917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368176947.439, "dur": 19.966, + "args": { + "External id": 295295,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 1918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368177042.984, "dur": 15.429, + "args": { + "External id": 295296,"Record function id": 0, "Ev Idx": 1919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368177046.021, "dur": 11.445, + "args": { + "External id": 295297,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368177050.595, "dur": 5.963, + "args": { + "External id": 295298,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368177052.046, "dur": 4.427, + "args": { + "External id": 295299,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368177062.225, "dur": 4.744, + "args": { + "External id": 295300,"Record function id": 0, "Ev Idx": 1923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368177063.389, "dur": 3.096, + "args": { + "External id": 295301,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368177064.366, "dur": 1.583, + "args": { + "External id": 295302,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368177064.843, "dur": 0.983, + "args": { + "External id": 295303,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368177070.306, "dur": 3.756, + "args": { + "External id": 295304,"Record function id": 0, "Ev Idx": 1927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368177071.229, "dur": 2.432, + "args": { + "External id": 295305,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368177071.698, "dur": 1.571, + "args": { + "External id": 295306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368177072.269, "dur": 0.915, + "args": { + "External id": 295307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 1930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368177077.260, "dur": 4.470, + "args": { + "External id": 295308,"Record function id": 0, "Ev Idx": 1931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368177078.383, "dur": 2.936, + "args": { + "External id": 295309,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368177079.041, "dur": 1.731, + "args": { + "External id": 295310,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368177080.044, "dur": 0.660, + "args": { + "External id": 295311,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 1934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368177084.964, "dur": 6.378, + "args": { + "External id": 295312,"Record function id": 0, "Ev Idx": 1935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368177086.340, "dur": 4.598, + "args": { + "External id": 295313,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368177086.839, "dur": 3.710, + "args": { + "External id": 295314,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368177087.607, "dur": 2.879, + "args": { + "External id": 295315,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368177094.488, "dur": 4.324, + "args": { + "External id": 295316,"Record function id": 0, "Ev Idx": 1939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368177095.616, "dur": 2.773, + "args": { + "External id": 295317,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368177096.236, "dur": 1.593, + "args": { + "External id": 295318,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368177096.892, "dur": 0.833, + "args": { + "External id": 295319,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368177102.354, "dur": 3.684, + "args": { + "External id": 295320,"Record function id": 0, "Ev Idx": 1943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368177103.244, "dur": 2.385, + "args": { + "External id": 295321,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368177103.695, "dur": 1.301, + "args": { + "External id": 295322,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368177104.072, "dur": 0.849, + "args": { + "External id": 295323,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368177109.597, "dur": 3.471, + "args": { + "External id": 295324,"Record function id": 0, "Ev Idx": 1947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368177110.531, "dur": 2.131, + "args": { + "External id": 295325,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368177111.006, "dur": 1.249, + "args": { + "External id": 295326,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368177111.573, "dur": 0.604, + "args": { + "External id": 295327,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 1950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368177116.147, "dur": 4.186, + "args": { + "External id": 295328,"Record function id": 0, "Ev Idx": 1951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368177117.044, "dur": 2.882, + "args": { + "External id": 295329,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368177117.506, "dur": 1.856, + "args": { + "External id": 295330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368177118.614, "dur": 0.675, + "args": { + "External id": 295331,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 1954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368177124.623, "dur": 35940.284, + "args": { + "External id": 295332,"Record function id": 0, "Sequence number": 1209199, "Fwd thread id": 1, "Ev Idx": 1955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368177126.144, "dur": 35929.908, + "args": { + "External id": 295333,"Sequence number": 1209199, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 1956 + } + }, + { + "ph": "f", "id": 33, "pid": 2070552, "tid": 2107648, "ts": 5333368177126.144, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.19)", "pid": 2070552, "tid": 2107648, + "ts": 5333368177154.891, "dur": 68.482, + "args": { + "External id": 295334,"Record function id": 0, "Ev Idx": 1957 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.19)", "pid": 2070552, "tid": 2107648, + "ts": 5333368177235.127, "dur": 85.031, + "args": { + "External id": 295335,"Record function id": 0, "Ev Idx": 1958 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.19)", "pid": 2070552, "tid": 2107648, + "ts": 5333368177327.481, "dur": 35721.369, + "args": { + "External id": 295336,"Record function id": 0, "Ev Idx": 1959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368177423.657, "dur": 7.370, + "args": { + "External id": 295337,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368177442.134, "dur": 5.370, + "args": { + "External id": 295338,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368177461.543, "dur": 34731.441, + "args": { + "External id": 295339,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368177475.512, "dur": 34704.634, + "args": { + "External id": 295340,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 1963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368177515.716, "dur": 14.768, + "args": { + "External id": 295341,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368177536.612, "dur": 34587.797, + "args": { + "External id": 295342,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 1965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368177540.594, "dur": 34582.838, + "args": { + "External id": 295343,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 1966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368177544.034, "dur": 7.392, + "args": { + "External id": 295344,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368177553.025, "dur": 34565.963, + "args": { + "External id": 295345,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 1968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368212295.960, "dur": 10.881, + "args": { + "External id": 295346,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 1969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368212299.216, "dur": 7.150, + "args": { + "External id": 295347,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368212338.384, "dur": 415.513, + "args": { + "External id": 295348,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 1971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368212368.424, "dur": 380.133, + "args": { + "External id": 295349,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 1972, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368212380.711, "dur": 361.263, + "args": { + "External id": 295350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 1973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368212777.053, "dur": 2.282, + "args": { + "External id": 295351,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 1974, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368212843.030, "dur": 7.180, + "args": { + "External id": 295352,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368212894.514, "dur": 1.193, + "args": { + "External id": 295353,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368212910.374, "dur": 1.407, + "args": { + "External id": 295354,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368212923.870, "dur": 3.052, + "args": { + "External id": 295355,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368212938.391, "dur": 0.840, + "args": { + "External id": 295356,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368212949.007, "dur": 0.924, + "args": { + "External id": 295357,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368212961.508, "dur": 1.021, + "args": { + "External id": 295358,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368212973.878, "dur": 4.434, + "args": { + "External id": 295359,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368212988.491, "dur": 0.761, + "args": { + "External id": 295360,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 1983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368213079.475, "dur": 2932.439, + "args": { + "External id": 295361,"Record function id": 0, "Ev Idx": 1984 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.18)", "pid": 2070552, "tid": 2107648, + "ts": 5333368213099.340, "dur": 1136.172, + "args": { + "External id": 295362,"Record function id": 0, "Ev Idx": 1985 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2070552, "tid": 2107648, + "ts": 5333368213114.433, "dur": 381.816, + "args": { + "External id": 295363,"Record function id": 0, "Ev Idx": 1986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368213227.891, "dur": 5.592, + "args": { + "External id": 295364,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 1987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368213238.158, "dur": 1.391, + "args": { + "External id": 295365,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368213242.848, "dur": 1.672, + "args": { + "External id": 295366,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368213247.665, "dur": 1.092, + "args": { + "External id": 295367,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368213251.230, "dur": 3.227, + "args": { + "External id": 295368,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368213256.881, "dur": 1.369, + "args": { + "External id": 295369,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 1992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368213260.776, "dur": 2.422, + "args": { + "External id": 295370,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 1993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368213265.449, "dur": 1.202, + "args": { + "External id": 295371,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368213269.308, "dur": 1.261, + "args": { + "External id": 295372,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368213272.926, "dur": 0.822, + "args": { + "External id": 295373,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 1996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368213297.694, "dur": 164.755, + "args": { + "External id": 295374,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368213317.636, "dur": 139.325, + "args": { + "External id": 295375,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 1998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368213334.479, "dur": 16.125, + "args": { + "External id": 295376,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 1999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368213354.792, "dur": 71.796, + "args": { + "External id": 295377,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368213358.993, "dur": 67.255, + "args": { + "External id": 295378,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368213362.603, "dur": 6.235, + "args": { + "External id": 295379,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368213370.517, "dur": 55.196, + "args": { + "External id": 295380,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2003 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.17", "pid": 2070552, "tid": 2107648, + "ts": 5333368213589.626, "dur": 636.357, + "args": { + "External id": 295381,"Record function id": 0, "Ev Idx": 2004 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2070552, "tid": 2107648, + "ts": 5333368213608.642, "dur": 600.191, + "args": { + "External id": 295382,"Record function id": 0, "Ev Idx": 2005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368213718.188, "dur": 8.763, + "args": { + "External id": 295383,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368213744.047, "dur": 33.822, + "args": { + "External id": 295384,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368213748.863, "dur": 1.734, + "args": { + "External id": 295385,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368213754.570, "dur": 0.740, + "args": { + "External id": 295386,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368213756.450, "dur": 0.481, + "args": { + "External id": 295387,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368213758.463, "dur": 0.699, + "args": { + "External id": 295388,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368213761.717, "dur": 0.440, + "args": { + "External id": 295389,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368213763.805, "dur": 0.663, + "args": { + "External id": 295390,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368213765.964, "dur": 1.174, + "args": { + "External id": 295391,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368213768.393, "dur": 2.783, + "args": { + "External id": 295392,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368213771.988, "dur": 0.661, + "args": { + "External id": 295393,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368213788.005, "dur": 34.617, + "args": { + "External id": 295394,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368213857.134, "dur": 100.357, + "args": { + "External id": 295395,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368213867.123, "dur": 3.810, + "args": { + "External id": 295396,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368213876.049, "dur": 9.688, + "args": { + "External id": 295397,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368213880.218, "dur": 5.072, + "args": { + "External id": 295398,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368213883.299, "dur": 0.694, + "args": { + "External id": 295399,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368213892.308, "dur": 27.693, + "args": { + "External id": 295400,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368213893.987, "dur": 0.608, + "args": { + "External id": 295401,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368213896.201, "dur": 1.189, + "args": { + "External id": 295402,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368213899.266, "dur": 0.550, + "args": { + "External id": 295403,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368213901.032, "dur": 0.385, + "args": { + "External id": 295404,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368213904.546, "dur": 2.384, + "args": { + "External id": 295405,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368213907.710, "dur": 0.594, + "args": { + "External id": 295406,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368213910.191, "dur": 0.370, + "args": { + "External id": 295407,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368213913.540, "dur": 0.341, + "args": { + "External id": 295408,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368213915.606, "dur": 0.336, + "args": { + "External id": 295409,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368213931.139, "dur": 18.685, + "args": { + "External id": 295410,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368214001.468, "dur": 118.320, + "args": { + "External id": 295411,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368214026.959, "dur": 89.501, + "args": { + "External id": 295412,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2035, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368214036.387, "dur": 74.657, + "args": { + "External id": 295413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368214134.182, "dur": 1.785, + "args": { + "External id": 295414,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2037, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368214244.845, "dur": 1746.029, + "args": { + "External id": 295415,"Sequence number": 1209198, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2038 + } + }, + { + "ph": "f", "id": 34, "pid": 2070552, "tid": 2107648, "ts": 5333368214244.845, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368214360.480, "dur": 114.248, + "args": { + "External id": 295416,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368214517.167, "dur": 42.137, + "args": { + "External id": 295417,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368214576.243, "dur": 94.152, + "args": { + "External id": 295418,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368214685.806, "dur": 41.727, + "args": { + "External id": 295419,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368214736.349, "dur": 46.583, + "args": { + "External id": 295420,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368214789.961, "dur": 27.563, + "args": { + "External id": 295421,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368214825.915, "dur": 43.105, + "args": { + "External id": 295422,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368214895.544, "dur": 23.163, + "args": { + "External id": 295423,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368214937.229, "dur": 32.056, + "args": { + "External id": 295424,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368215009.248, "dur": 18.184, + "args": { + "External id": 295425,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368215042.335, "dur": 15.383, + "args": { + "External id": 295426,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368215069.065, "dur": 29.852, + "args": { + "External id": 295427,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368215102.010, "dur": 36.401, + "args": { + "External id": 295428,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368215190.769, "dur": 190.207, + "args": { + "External id": 295429,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368215283.074, "dur": 8.647, + "args": { + "External id": 295430,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368215293.750, "dur": 2.282, + "args": { + "External id": 295431,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368215417.391, "dur": 28.256, + "args": { + "External id": 295432,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368215457.365, "dur": 13.002, + "args": { + "External id": 295433,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368215479.424, "dur": 58.826, + "args": { + "External id": 295434,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368215549.946, "dur": 43.070, + "args": { + "External id": 295435,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368215601.962, "dur": 60.469, + "args": { + "External id": 295436,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368215672.242, "dur": 37.358, + "args": { + "External id": 295437,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368215716.110, "dur": 20.102, + "args": { + "External id": 295438,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368215742.886, "dur": 30.088, + "args": { + "External id": 295439,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368215794.525, "dur": 25.659, + "args": { + "External id": 295440,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368215838.063, "dur": 26.710, + "args": { + "External id": 295441,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368215879.258, "dur": 21.731, + "args": { + "External id": 295442,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368215915.430, "dur": 14.319, + "args": { + "External id": 295443,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368215947.071, "dur": 14.416, + "args": { + "External id": 295444,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368216034.718, "dur": 15.514, + "args": { + "External id": 295445,"Record function id": 0, "Ev Idx": 2068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368216038.108, "dur": 11.029, + "args": { + "External id": 295446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368216042.564, "dur": 5.802, + "args": { + "External id": 295447,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368216043.740, "dur": 4.491, + "args": { + "External id": 295448,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368216053.895, "dur": 4.903, + "args": { + "External id": 295449,"Record function id": 0, "Ev Idx": 2072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368216055.399, "dur": 2.952, + "args": { + "External id": 295450,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368216056.099, "dur": 1.576, + "args": { + "External id": 295451,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368216056.585, "dur": 1.000, + "args": { + "External id": 295452,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368216061.986, "dur": 4.124, + "args": { + "External id": 295453,"Record function id": 0, "Ev Idx": 2076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368216063.322, "dur": 2.376, + "args": { + "External id": 295454,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368216064.198, "dur": 1.089, + "args": { + "External id": 295455,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368216064.573, "dur": 0.625, + "args": { + "External id": 295456,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368216069.305, "dur": 6.512, + "args": { + "External id": 295457,"Record function id": 0, "Ev Idx": 2080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368216070.354, "dur": 5.033, + "args": { + "External id": 295458,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368216071.031, "dur": 3.731, + "args": { + "External id": 295459,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368216071.609, "dur": 3.080, + "args": { + "External id": 295460,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368216078.936, "dur": 5.114, + "args": { + "External id": 295461,"Record function id": 0, "Ev Idx": 2084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368216080.725, "dur": 2.919, + "args": { + "External id": 295462,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368216081.207, "dur": 1.717, + "args": { + "External id": 295463,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368216081.989, "dur": 0.862, + "args": { + "External id": 295464,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368216087.121, "dur": 4.528, + "args": { + "External id": 295465,"Record function id": 0, "Ev Idx": 2088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368216088.542, "dur": 2.721, + "args": { + "External id": 295466,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368216089.278, "dur": 1.503, + "args": { + "External id": 295467,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368216089.859, "dur": 0.846, + "args": { + "External id": 295468,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368216094.734, "dur": 4.250, + "args": { + "External id": 295469,"Record function id": 0, "Ev Idx": 2092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368216095.932, "dur": 2.644, + "args": { + "External id": 295470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368216096.700, "dur": 1.244, + "args": { + "External id": 295471,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368216097.245, "dur": 0.625, + "args": { + "External id": 295472,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368216101.984, "dur": 4.413, + "args": { + "External id": 295473,"Record function id": 0, "Ev Idx": 2096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368216103.100, "dur": 2.883, + "args": { + "External id": 295474,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368216103.742, "dur": 1.685, + "args": { + "External id": 295475,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368216104.507, "dur": 0.844, + "args": { + "External id": 295476,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368216109.809, "dur": 4.306, + "args": { + "External id": 295477,"Record function id": 0, "Ev Idx": 2100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368216111.156, "dur": 2.532, + "args": { + "External id": 295478,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368216111.927, "dur": 1.223, + "args": { + "External id": 295479,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368216112.459, "dur": 0.617, + "args": { + "External id": 295480,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368216118.270, "dur": 35971.426, + "args": { + "External id": 295481,"Record function id": 0, "Sequence number": 1209197, "Fwd thread id": 1, "Ev Idx": 2104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368216119.609, "dur": 35960.154, + "args": { + "External id": 295482,"Sequence number": 1209197, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2105 + } + }, + { + "ph": "f", "id": 35, "pid": 2070552, "tid": 2107648, "ts": 5333368216119.609, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.18)", "pid": 2070552, "tid": 2107648, + "ts": 5333368216149.015, "dur": 63.069, + "args": { + "External id": 295483,"Record function id": 0, "Ev Idx": 2106 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.18)", "pid": 2070552, "tid": 2107648, + "ts": 5333368216223.962, "dur": 76.413, + "args": { + "External id": 295484,"Record function id": 0, "Ev Idx": 2107 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.18)", "pid": 2070552, "tid": 2107648, + "ts": 5333368216306.333, "dur": 35765.738, + "args": { + "External id": 295485,"Record function id": 0, "Ev Idx": 2108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368216402.922, "dur": 7.874, + "args": { + "External id": 295486,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368216421.075, "dur": 5.154, + "args": { + "External id": 295487,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368216440.982, "dur": 34814.274, + "args": { + "External id": 295488,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368216455.765, "dur": 34787.059, + "args": { + "External id": 295489,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368216496.622, "dur": 16.753, + "args": { + "External id": 295490,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368216519.622, "dur": 34672.321, + "args": { + "External id": 295491,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368216522.187, "dur": 34668.546, + "args": { + "External id": 295492,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368216525.766, "dur": 5.809, + "args": { + "External id": 295493,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368216533.210, "dur": 34652.712, + "args": { + "External id": 295494,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368251362.276, "dur": 12.426, + "args": { + "External id": 295495,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368251365.738, "dur": 8.590, + "args": { + "External id": 295496,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368251405.611, "dur": 375.295, + "args": { + "External id": 295497,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368251433.832, "dur": 341.771, + "args": { + "External id": 295498,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2121, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368251446.323, "dur": 323.220, + "args": { + "External id": 295499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368251802.840, "dur": 2.256, + "args": { + "External id": 295500,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2123, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368251867.876, "dur": 6.739, + "args": { + "External id": 295501,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368251918.919, "dur": 1.448, + "args": { + "External id": 295502,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368251935.561, "dur": 1.468, + "args": { + "External id": 295503,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368251950.263, "dur": 0.860, + "args": { + "External id": 295504,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368251962.576, "dur": 0.999, + "args": { + "External id": 295505,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368251973.429, "dur": 0.726, + "args": { + "External id": 295506,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368251984.928, "dur": 1.160, + "args": { + "External id": 295507,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368251998.654, "dur": 2.082, + "args": { + "External id": 295508,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368252010.584, "dur": 0.768, + "args": { + "External id": 295509,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368252103.967, "dur": 2886.220, + "args": { + "External id": 295510,"Record function id": 0, "Ev Idx": 2133 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.17)", "pid": 2070552, "tid": 2107648, + "ts": 5333368252124.487, "dur": 1084.561, + "args": { + "External id": 295511,"Record function id": 0, "Ev Idx": 2134 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2070552, "tid": 2107648, + "ts": 5333368252140.531, "dur": 353.077, + "args": { + "External id": 295512,"Record function id": 0, "Ev Idx": 2135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368252258.355, "dur": 5.012, + "args": { + "External id": 295513,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368252266.829, "dur": 1.119, + "args": { + "External id": 295514,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368252269.581, "dur": 1.284, + "args": { + "External id": 295515,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368252272.469, "dur": 2.975, + "args": { + "External id": 295516,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368252276.856, "dur": 0.996, + "args": { + "External id": 295517,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368252279.282, "dur": 1.293, + "args": { + "External id": 295518,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368252282.189, "dur": 1.926, + "args": { + "External id": 295519,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368252285.677, "dur": 0.779, + "args": { + "External id": 295520,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368252288.021, "dur": 0.755, + "args": { + "External id": 295521,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368252290.485, "dur": 0.684, + "args": { + "External id": 295522,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368252310.359, "dur": 150.973, + "args": { + "External id": 295523,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368252327.105, "dur": 129.285, + "args": { + "External id": 295524,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368252341.290, "dur": 12.869, + "args": { + "External id": 295525,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368252358.892, "dur": 70.614, + "args": { + "External id": 295526,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368252361.451, "dur": 67.648, + "args": { + "External id": 295527,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368252364.957, "dur": 7.728, + "args": { + "External id": 295528,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368252374.239, "dur": 54.324, + "args": { + "External id": 295529,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2152 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.16", "pid": 2070552, "tid": 2107648, + "ts": 5333368252585.492, "dur": 612.035, + "args": { + "External id": 295530,"Record function id": 0, "Ev Idx": 2153 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2070552, "tid": 2107648, + "ts": 5333368252602.083, "dur": 558.535, + "args": { + "External id": 295531,"Record function id": 0, "Ev Idx": 2154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368252705.857, "dur": 6.400, + "args": { + "External id": 295532,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368252729.476, "dur": 32.326, + "args": { + "External id": 295533,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368252734.688, "dur": 2.836, + "args": { + "External id": 295534,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368252739.695, "dur": 0.238, + "args": { + "External id": 295535,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368252741.966, "dur": 0.410, + "args": { + "External id": 295536,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368252744.958, "dur": 0.626, + "args": { + "External id": 295537,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368252746.905, "dur": 0.599, + "args": { + "External id": 295538,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368252748.569, "dur": 0.388, + "args": { + "External id": 295539,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368252751.083, "dur": 2.108, + "args": { + "External id": 295540,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368252754.158, "dur": 0.520, + "args": { + "External id": 295541,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368252756.006, "dur": 1.269, + "args": { + "External id": 295542,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368252772.566, "dur": 34.370, + "args": { + "External id": 295543,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368252838.025, "dur": 98.773, + "args": { + "External id": 295544,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368252848.370, "dur": 3.317, + "args": { + "External id": 295545,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368252856.990, "dur": 10.109, + "args": { + "External id": 295546,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368252861.215, "dur": 5.455, + "args": { + "External id": 295547,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368252865.059, "dur": 0.561, + "args": { + "External id": 295548,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368252873.622, "dur": 25.127, + "args": { + "External id": 295549,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368252875.398, "dur": 0.441, + "args": { + "External id": 295550,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368252877.869, "dur": 0.303, + "args": { + "External id": 295551,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368252879.309, "dur": 1.513, + "args": { + "External id": 295552,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368252882.474, "dur": 1.864, + "args": { + "External id": 295553,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368252885.615, "dur": 0.601, + "args": { + "External id": 295554,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368252889.202, "dur": 0.385, + "args": { + "External id": 295555,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368252890.714, "dur": 0.445, + "args": { + "External id": 295556,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368252892.748, "dur": 0.579, + "args": { + "External id": 295557,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368252895.215, "dur": 0.250, + "args": { + "External id": 295558,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368252908.500, "dur": 20.370, + "args": { + "External id": 295559,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368252981.842, "dur": 115.041, + "args": { + "External id": 295560,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368253006.407, "dur": 87.081, + "args": { + "External id": 295561,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2184, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368253015.408, "dur": 73.508, + "args": { + "External id": 295562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368253112.078, "dur": 1.837, + "args": { + "External id": 295563,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2186, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368253219.347, "dur": 1743.709, + "args": { + "External id": 295564,"Sequence number": 1209196, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2187 + } + }, + { + "ph": "f", "id": 36, "pid": 2070552, "tid": 2107648, "ts": 5333368253219.347, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368253350.582, "dur": 111.164, + "args": { + "External id": 295565,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368253505.017, "dur": 43.260, + "args": { + "External id": 295566,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368253564.608, "dur": 48.956, + "args": { + "External id": 295567,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368253665.371, "dur": 39.700, + "args": { + "External id": 295568,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368253714.802, "dur": 47.480, + "args": { + "External id": 295569,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368253769.397, "dur": 28.371, + "args": { + "External id": 295570,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368253805.943, "dur": 42.886, + "args": { + "External id": 295571,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368253873.454, "dur": 25.390, + "args": { + "External id": 295572,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368253917.862, "dur": 29.841, + "args": { + "External id": 295573,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368253969.097, "dur": 21.535, + "args": { + "External id": 295574,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368254002.757, "dur": 15.037, + "args": { + "External id": 295575,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368254026.055, "dur": 34.366, + "args": { + "External id": 295576,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368254063.731, "dur": 32.730, + "args": { + "External id": 295577,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368254130.176, "dur": 211.290, + "args": { + "External id": 295578,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368254236.800, "dur": 8.949, + "args": { + "External id": 295579,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368254248.174, "dur": 2.568, + "args": { + "External id": 295580,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368254378.844, "dur": 30.518, + "args": { + "External id": 295581,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368254423.531, "dur": 15.053, + "args": { + "External id": 295582,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368254446.912, "dur": 48.714, + "args": { + "External id": 295583,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368254501.684, "dur": 56.662, + "args": { + "External id": 295584,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368254574.436, "dur": 25.725, + "args": { + "External id": 295585,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368254605.110, "dur": 69.152, + "args": { + "External id": 295586,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368254683.437, "dur": 23.659, + "args": { + "External id": 295587,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368254714.008, "dur": 33.508, + "args": { + "External id": 295588,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368254769.112, "dur": 24.538, + "args": { + "External id": 295589,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368254812.175, "dur": 26.455, + "args": { + "External id": 295590,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368254853.010, "dur": 17.655, + "args": { + "External id": 295591,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368254885.964, "dur": 18.020, + "args": { + "External id": 295592,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368254915.838, "dur": 16.020, + "args": { + "External id": 295593,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368255012.664, "dur": 14.389, + "args": { + "External id": 295594,"Record function id": 0, "Ev Idx": 2217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368255015.721, "dur": 10.419, + "args": { + "External id": 295595,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368255019.840, "dur": 5.443, + "args": { + "External id": 295596,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368255021.253, "dur": 3.887, + "args": { + "External id": 295597,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368255030.815, "dur": 4.449, + "args": { + "External id": 295598,"Record function id": 0, "Ev Idx": 2221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368255032.360, "dur": 2.477, + "args": { + "External id": 295599,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368255033.061, "dur": 1.211, + "args": { + "External id": 295600,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368255033.400, "dur": 0.780, + "args": { + "External id": 295601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368255038.668, "dur": 6.723, + "args": { + "External id": 295602,"Record function id": 0, "Ev Idx": 2225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368255040.067, "dur": 4.925, + "args": { + "External id": 295603,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368255040.842, "dur": 3.617, + "args": { + "External id": 295604,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368255041.301, "dur": 3.071, + "args": { + "External id": 295605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368255048.581, "dur": 3.450, + "args": { + "External id": 295606,"Record function id": 0, "Ev Idx": 2229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368255049.568, "dur": 2.057, + "args": { + "External id": 295607,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368255050.064, "dur": 1.044, + "args": { + "External id": 295608,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368255050.596, "dur": 0.442, + "args": { + "External id": 295609,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368255055.081, "dur": 3.724, + "args": { + "External id": 295610,"Record function id": 0, "Ev Idx": 2233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368255056.112, "dur": 2.294, + "args": { + "External id": 295611,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368255056.564, "dur": 1.411, + "args": { + "External id": 295612,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368255057.230, "dur": 0.670, + "args": { + "External id": 295613,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368255061.885, "dur": 4.247, + "args": { + "External id": 295614,"Record function id": 0, "Ev Idx": 2237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368255063.299, "dur": 2.358, + "args": { + "External id": 295615,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368255063.955, "dur": 1.306, + "args": { + "External id": 295616,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368255064.531, "dur": 0.654, + "args": { + "External id": 295617,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368255069.258, "dur": 3.880, + "args": { + "External id": 295618,"Record function id": 0, "Ev Idx": 2241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368255070.347, "dur": 2.395, + "args": { + "External id": 295619,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368255070.992, "dur": 1.115, + "args": { + "External id": 295620,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368255071.421, "dur": 0.612, + "args": { + "External id": 295621,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368255076.135, "dur": 4.035, + "args": { + "External id": 295622,"Record function id": 0, "Ev Idx": 2245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368255077.091, "dur": 2.680, + "args": { + "External id": 295623,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368255077.931, "dur": 1.098, + "args": { + "External id": 295624,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368255078.259, "dur": 0.696, + "args": { + "External id": 295625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368255083.548, "dur": 4.450, + "args": { + "External id": 295626,"Record function id": 0, "Ev Idx": 2249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368255084.512, "dur": 3.080, + "args": { + "External id": 295627,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368255085.233, "dur": 1.829, + "args": { + "External id": 295628,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368255086.044, "dur": 0.944, + "args": { + "External id": 295629,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368255091.920, "dur": 36818.733, + "args": { + "External id": 295630,"Record function id": 0, "Sequence number": 1209195, "Fwd thread id": 1, "Ev Idx": 2253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368255093.623, "dur": 36807.151, + "args": { + "External id": 295631,"Sequence number": 1209195, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2254 + } + }, + { + "ph": "f", "id": 37, "pid": 2070552, "tid": 2107648, "ts": 5333368255093.623, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.17)", "pid": 2070552, "tid": 2107648, + "ts": 5333368255123.291, "dur": 39.962, + "args": { + "External id": 295632,"Record function id": 0, "Ev Idx": 2255 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.17)", "pid": 2070552, "tid": 2107648, + "ts": 5333368255193.630, "dur": 90.601, + "args": { + "External id": 295633,"Record function id": 0, "Ev Idx": 2256 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.17)", "pid": 2070552, "tid": 2107648, + "ts": 5333368255291.163, "dur": 36601.824, + "args": { + "External id": 295634,"Record function id": 0, "Ev Idx": 2257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368255384.071, "dur": 7.615, + "args": { + "External id": 295635,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368255402.194, "dur": 6.862, + "args": { + "External id": 295636,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368255422.972, "dur": 35608.989, + "args": { + "External id": 295637,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368255437.422, "dur": 35584.073, + "args": { + "External id": 295638,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368255475.087, "dur": 14.510, + "args": { + "External id": 295639,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368255495.947, "dur": 35488.094, + "args": { + "External id": 295640,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368255498.698, "dur": 35484.477, + "args": { + "External id": 295641,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368255502.780, "dur": 5.020, + "args": { + "External id": 295642,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368255509.471, "dur": 35469.026, + "args": { + "External id": 295643,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368291132.667, "dur": 10.836, + "args": { + "External id": 295644,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368291135.858, "dur": 7.272, + "args": { + "External id": 295645,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368291190.812, "dur": 370.327, + "args": { + "External id": 295646,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368291222.185, "dur": 334.092, + "args": { + "External id": 295647,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2270, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368291236.751, "dur": 313.988, + "args": { + "External id": 295648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368291580.113, "dur": 2.102, + "args": { + "External id": 295649,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2272, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368291683.188, "dur": 7.341, + "args": { + "External id": 295650,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368291735.838, "dur": 1.475, + "args": { + "External id": 295651,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368291752.519, "dur": 3.289, + "args": { + "External id": 295652,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368291767.293, "dur": 0.903, + "args": { + "External id": 295653,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368291780.947, "dur": 1.024, + "args": { + "External id": 295654,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368291792.301, "dur": 0.656, + "args": { + "External id": 295655,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368291803.531, "dur": 2.354, + "args": { + "External id": 295656,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368291816.399, "dur": 1.593, + "args": { + "External id": 295657,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368291828.702, "dur": 0.538, + "args": { + "External id": 295658,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368291926.599, "dur": 2849.286, + "args": { + "External id": 295659,"Record function id": 0, "Ev Idx": 2282 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.16)", "pid": 2070552, "tid": 2107648, + "ts": 5333368291947.679, "dur": 1047.087, + "args": { + "External id": 295660,"Record function id": 0, "Ev Idx": 2283 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2070552, "tid": 2107648, + "ts": 5333368291962.507, "dur": 358.185, + "args": { + "External id": 295661,"Record function id": 0, "Ev Idx": 2284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368292047.389, "dur": 3.942, + "args": { + "External id": 295662,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368292055.077, "dur": 0.947, + "args": { + "External id": 295663,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368292057.857, "dur": 2.921, + "args": { + "External id": 295664,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368292062.686, "dur": 0.656, + "args": { + "External id": 295665,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368292065.017, "dur": 0.772, + "args": { + "External id": 295666,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368292067.665, "dur": 0.741, + "args": { + "External id": 295667,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368292070.082, "dur": 1.763, + "args": { + "External id": 295668,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368292073.788, "dur": 0.891, + "args": { + "External id": 295669,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368292076.049, "dur": 0.754, + "args": { + "External id": 295670,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368292078.862, "dur": 0.558, + "args": { + "External id": 295671,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368292099.033, "dur": 185.280, + "args": { + "External id": 295672,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368292114.497, "dur": 164.039, + "args": { + "External id": 295673,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368292127.386, "dur": 14.579, + "args": { + "External id": 295674,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368292145.666, "dur": 100.863, + "args": { + "External id": 295675,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368292148.422, "dur": 97.710, + "args": { + "External id": 295676,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368292151.985, "dur": 5.629, + "args": { + "External id": 295677,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368292163.437, "dur": 81.437, + "args": { + "External id": 295678,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2301 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.15", "pid": 2070552, "tid": 2107648, + "ts": 5333368292416.792, "dur": 570.725, + "args": { + "External id": 295679,"Record function id": 0, "Ev Idx": 2302 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2070552, "tid": 2107648, + "ts": 5333368292435.238, "dur": 539.962, + "args": { + "External id": 295680,"Record function id": 0, "Ev Idx": 2303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368292501.803, "dur": 6.071, + "args": { + "External id": 295681,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368292524.182, "dur": 24.653, + "args": { + "External id": 295682,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368292529.179, "dur": 1.594, + "args": { + "External id": 295683,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368292532.386, "dur": 0.509, + "args": { + "External id": 295684,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368292533.922, "dur": 0.396, + "args": { + "External id": 295685,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368292535.503, "dur": 0.218, + "args": { + "External id": 295686,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368292536.557, "dur": 0.589, + "args": { + "External id": 295687,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368292538.213, "dur": 2.126, + "args": { + "External id": 295688,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368292541.106, "dur": 0.455, + "args": { + "External id": 295689,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368292542.620, "dur": 0.426, + "args": { + "External id": 295690,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368292544.044, "dur": 0.354, + "args": { + "External id": 295691,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368292558.265, "dur": 31.564, + "args": { + "External id": 295692,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368292656.378, "dur": 97.942, + "args": { + "External id": 295693,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368292668.997, "dur": 5.069, + "args": { + "External id": 295694,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368292679.481, "dur": 10.341, + "args": { + "External id": 295695,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368292683.864, "dur": 5.550, + "args": { + "External id": 295696,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368292687.139, "dur": 0.717, + "args": { + "External id": 295697,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368292697.008, "dur": 18.539, + "args": { + "External id": 295698,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368292699.035, "dur": 0.486, + "args": { + "External id": 295699,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368292700.596, "dur": 0.339, + "args": { + "External id": 295700,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368292701.828, "dur": 2.256, + "args": { + "External id": 295701,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368292705.146, "dur": 0.289, + "args": { + "External id": 295702,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368292706.485, "dur": 0.358, + "args": { + "External id": 295703,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368292707.722, "dur": 0.408, + "args": { + "External id": 295704,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368292709.068, "dur": 0.547, + "args": { + "External id": 295705,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368292710.603, "dur": 0.343, + "args": { + "External id": 295706,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368292711.641, "dur": 0.326, + "args": { + "External id": 295707,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368292725.583, "dur": 20.984, + "args": { + "External id": 295708,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368292800.872, "dur": 107.638, + "args": { + "External id": 295709,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368292821.261, "dur": 83.849, + "args": { + "External id": 295710,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2333, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368292830.154, "dur": 70.787, + "args": { + "External id": 295711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368292925.387, "dur": 1.751, + "args": { + "External id": 295712,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2335, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368293001.959, "dur": 1751.886, + "args": { + "External id": 295713,"Sequence number": 1209194, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2336 + } + }, + { + "ph": "f", "id": 38, "pid": 2070552, "tid": 2107648, "ts": 5333368293001.959, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368293116.115, "dur": 145.556, + "args": { + "External id": 295714,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368293310.320, "dur": 43.237, + "args": { + "External id": 295715,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368293372.997, "dur": 58.624, + "args": { + "External id": 295716,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368293443.783, "dur": 37.558, + "args": { + "External id": 295717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368293487.646, "dur": 49.480, + "args": { + "External id": 295718,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368293543.753, "dur": 32.173, + "args": { + "External id": 295719,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368293582.947, "dur": 83.698, + "args": { + "External id": 295720,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368293696.881, "dur": 25.715, + "args": { + "External id": 295721,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368293739.991, "dur": 29.539, + "args": { + "External id": 295722,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368293788.854, "dur": 22.750, + "args": { + "External id": 295723,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368293821.089, "dur": 22.827, + "args": { + "External id": 295724,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368293854.892, "dur": 33.721, + "args": { + "External id": 295725,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368293891.985, "dur": 36.916, + "args": { + "External id": 295726,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368293958.647, "dur": 172.544, + "args": { + "External id": 295727,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368294037.682, "dur": 7.163, + "args": { + "External id": 295728,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368294046.736, "dur": 2.361, + "args": { + "External id": 295729,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368294163.117, "dur": 50.349, + "args": { + "External id": 295730,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368294230.759, "dur": 18.394, + "args": { + "External id": 295731,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368294259.786, "dur": 42.866, + "args": { + "External id": 295732,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368294308.184, "dur": 56.229, + "args": { + "External id": 295733,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368294380.609, "dur": 29.430, + "args": { + "External id": 295734,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368294414.712, "dur": 33.756, + "args": { + "External id": 295735,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368294453.998, "dur": 23.647, + "args": { + "External id": 295736,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368294484.151, "dur": 28.801, + "args": { + "External id": 295737,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368294538.732, "dur": 21.167, + "args": { + "External id": 295738,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368294576.073, "dur": 22.378, + "args": { + "External id": 295739,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368294610.967, "dur": 50.819, + "args": { + "External id": 295740,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368294680.669, "dur": 14.961, + "args": { + "External id": 295741,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368294707.438, "dur": 16.285, + "args": { + "External id": 295742,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368294799.245, "dur": 14.946, + "args": { + "External id": 295743,"Record function id": 0, "Ev Idx": 2366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368294802.431, "dur": 10.766, + "args": { + "External id": 295744,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368294806.955, "dur": 5.474, + "args": { + "External id": 295745,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368294808.118, "dur": 4.217, + "args": { + "External id": 295746,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368294817.913, "dur": 6.264, + "args": { + "External id": 295747,"Record function id": 0, "Ev Idx": 2370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368294819.064, "dur": 4.692, + "args": { + "External id": 295748,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368294820.006, "dur": 3.196, + "args": { + "External id": 295749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368294820.527, "dur": 2.587, + "args": { + "External id": 295750,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368294827.374, "dur": 4.449, + "args": { + "External id": 295751,"Record function id": 0, "Ev Idx": 2374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368294828.518, "dur": 2.885, + "args": { + "External id": 295752,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368294829.571, "dur": 1.425, + "args": { + "External id": 295753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368294830.002, "dur": 0.919, + "args": { + "External id": 295754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368294834.878, "dur": 5.099, + "args": { + "External id": 295755,"Record function id": 0, "Ev Idx": 2378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368294836.481, "dur": 3.107, + "args": { + "External id": 295756,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368294837.141, "dur": 1.704, + "args": { + "External id": 295757,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368294837.828, "dur": 0.941, + "args": { + "External id": 295758,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368294842.963, "dur": 3.963, + "args": { + "External id": 295759,"Record function id": 0, "Ev Idx": 2382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368294844.101, "dur": 2.412, + "args": { + "External id": 295760,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368294844.714, "dur": 1.095, + "args": { + "External id": 295761,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368294845.131, "dur": 0.602, + "args": { + "External id": 295762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368294849.963, "dur": 3.805, + "args": { + "External id": 295763,"Record function id": 0, "Ev Idx": 2386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368294851.073, "dur": 2.286, + "args": { + "External id": 295764,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368294851.675, "dur": 1.274, + "args": { + "External id": 295765,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368294852.256, "dur": 0.618, + "args": { + "External id": 295766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368294856.834, "dur": 4.059, + "args": { + "External id": 295767,"Record function id": 0, "Ev Idx": 2390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368294857.902, "dur": 2.576, + "args": { + "External id": 295768,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368294858.362, "dur": 1.491, + "args": { + "External id": 295769,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368294858.843, "dur": 0.934, + "args": { + "External id": 295770,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368294863.947, "dur": 3.678, + "args": { + "External id": 295771,"Record function id": 0, "Ev Idx": 2394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368294865.012, "dur": 2.194, + "args": { + "External id": 295772,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368294865.520, "dur": 1.278, + "args": { + "External id": 295773,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368294866.057, "dur": 0.675, + "args": { + "External id": 295774,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368294870.577, "dur": 6.044, + "args": { + "External id": 295775,"Record function id": 0, "Ev Idx": 2398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368294871.552, "dur": 4.675, + "args": { + "External id": 295776,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368294872.391, "dur": 3.455, + "args": { + "External id": 295777,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368294872.803, "dur": 2.969, + "args": { + "External id": 295778,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368294880.789, "dur": 36726.230, + "args": { + "External id": 295779,"Record function id": 0, "Sequence number": 1209193, "Fwd thread id": 1, "Ev Idx": 2402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368294882.093, "dur": 36715.907, + "args": { + "External id": 295780,"Sequence number": 1209193, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2403 + } + }, + { + "ph": "f", "id": 39, "pid": 2070552, "tid": 2107648, "ts": 5333368294882.093, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.16)", "pid": 2070552, "tid": 2107648, + "ts": 5333368294912.257, "dur": 41.627, + "args": { + "External id": 295781,"Record function id": 0, "Ev Idx": 2404 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.16)", "pid": 2070552, "tid": 2107648, + "ts": 5333368294961.619, "dur": 67.965, + "args": { + "External id": 295782,"Record function id": 0, "Ev Idx": 2405 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.16)", "pid": 2070552, "tid": 2107648, + "ts": 5333368295035.581, "dur": 36554.501, + "args": { + "External id": 295783,"Record function id": 0, "Ev Idx": 2406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368295129.253, "dur": 6.974, + "args": { + "External id": 295784,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368295145.821, "dur": 4.804, + "args": { + "External id": 295785,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368295164.213, "dur": 35597.064, + "args": { + "External id": 295786,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368295203.016, "dur": 35546.883, + "args": { + "External id": 295787,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368295247.334, "dur": 18.771, + "args": { + "External id": 295788,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368295274.074, "dur": 35430.886, + "args": { + "External id": 295789,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368295277.054, "dur": 35427.160, + "args": { + "External id": 295790,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368295281.906, "dur": 6.719, + "args": { + "External id": 295791,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368295290.490, "dur": 35409.411, + "args": { + "External id": 295792,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368330862.680, "dur": 10.142, + "args": { + "External id": 295793,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368330865.797, "dur": 6.664, + "args": { + "External id": 295794,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368330902.700, "dur": 398.043, + "args": { + "External id": 295795,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368330926.779, "dur": 368.283, + "args": { + "External id": 295796,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2419, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368330937.586, "dur": 351.057, + "args": { + "External id": 295797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368331324.159, "dur": 2.312, + "args": { + "External id": 295798,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2421, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368331392.142, "dur": 6.999, + "args": { + "External id": 295799,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368331443.203, "dur": 3.244, + "args": { + "External id": 295800,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368331461.118, "dur": 1.348, + "args": { + "External id": 295801,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368331473.615, "dur": 1.143, + "args": { + "External id": 295802,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368331484.418, "dur": 1.003, + "args": { + "External id": 295803,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368331495.445, "dur": 2.284, + "args": { + "External id": 295804,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368331507.468, "dur": 0.824, + "args": { + "External id": 295805,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368331518.372, "dur": 1.695, + "args": { + "External id": 295806,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368331529.277, "dur": 0.732, + "args": { + "External id": 295807,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368331658.415, "dur": 2840.803, + "args": { + "External id": 295808,"Record function id": 0, "Ev Idx": 2431 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.15)", "pid": 2070552, "tid": 2107648, + "ts": 5333368331682.247, "dur": 1064.383, + "args": { + "External id": 295809,"Record function id": 0, "Ev Idx": 2432 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2070552, "tid": 2107648, + "ts": 5333368331700.064, "dur": 320.776, + "args": { + "External id": 295810,"Record function id": 0, "Ev Idx": 2433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368331790.578, "dur": 6.261, + "args": { + "External id": 295811,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368331799.945, "dur": 0.811, + "args": { + "External id": 295812,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368331802.495, "dur": 0.677, + "args": { + "External id": 295813,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368331805.014, "dur": 0.561, + "args": { + "External id": 295814,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368331806.929, "dur": 0.784, + "args": { + "External id": 295815,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368331809.024, "dur": 0.559, + "args": { + "External id": 295816,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368331811.108, "dur": 1.681, + "args": { + "External id": 295817,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368331814.314, "dur": 0.769, + "args": { + "External id": 295818,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368331816.592, "dur": 2.646, + "args": { + "External id": 295819,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368331820.800, "dur": 0.615, + "args": { + "External id": 295820,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368331840.093, "dur": 151.345, + "args": { + "External id": 295821,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368331856.736, "dur": 130.161, + "args": { + "External id": 295822,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368331869.849, "dur": 13.203, + "args": { + "External id": 295823,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368331887.040, "dur": 69.896, + "args": { + "External id": 295824,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368331889.800, "dur": 66.888, + "args": { + "External id": 295825,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368331893.936, "dur": 5.716, + "args": { + "External id": 295826,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368331901.262, "dur": 54.804, + "args": { + "External id": 295827,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2450 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.14", "pid": 2070552, "tid": 2107648, + "ts": 5333368332109.722, "dur": 629.059, + "args": { + "External id": 295828,"Record function id": 0, "Ev Idx": 2451 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2070552, "tid": 2107648, + "ts": 5333368332126.845, "dur": 599.125, + "args": { + "External id": 295829,"Record function id": 0, "Ev Idx": 2452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368332215.861, "dur": 8.340, + "args": { + "External id": 295830,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368332242.393, "dur": 29.027, + "args": { + "External id": 295831,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368332247.950, "dur": 1.969, + "args": { + "External id": 295832,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368332251.541, "dur": 0.929, + "args": { + "External id": 295833,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368332253.175, "dur": 0.657, + "args": { + "External id": 295834,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368332254.951, "dur": 1.518, + "args": { + "External id": 295835,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368332258.354, "dur": 0.467, + "args": { + "External id": 295836,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368332260.231, "dur": 0.379, + "args": { + "External id": 295837,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368332262.362, "dur": 0.550, + "args": { + "External id": 295838,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368332263.976, "dur": 0.382, + "args": { + "External id": 295839,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368332266.057, "dur": 0.346, + "args": { + "External id": 295840,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368332283.088, "dur": 38.079, + "args": { + "External id": 295841,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368332354.805, "dur": 99.917, + "args": { + "External id": 295842,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368332365.368, "dur": 3.196, + "args": { + "External id": 295843,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368332373.835, "dur": 10.322, + "args": { + "External id": 295844,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368332378.242, "dur": 5.488, + "args": { + "External id": 295845,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368332381.733, "dur": 0.664, + "args": { + "External id": 295846,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368332390.454, "dur": 25.268, + "args": { + "External id": 295847,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368332392.643, "dur": 2.604, + "args": { + "External id": 295848,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368332396.656, "dur": 0.543, + "args": { + "External id": 295849,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368332399.007, "dur": 0.395, + "args": { + "External id": 295850,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368332401.387, "dur": 0.751, + "args": { + "External id": 295851,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368332403.393, "dur": 0.351, + "args": { + "External id": 295852,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368332405.034, "dur": 0.239, + "args": { + "External id": 295853,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368332406.957, "dur": 0.536, + "args": { + "External id": 295854,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368332408.818, "dur": 0.337, + "args": { + "External id": 295855,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368332410.502, "dur": 1.727, + "args": { + "External id": 295856,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368332426.061, "dur": 21.505, + "args": { + "External id": 295857,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368332501.142, "dur": 113.768, + "args": { + "External id": 295858,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368332525.066, "dur": 86.416, + "args": { + "External id": 295859,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2482, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368332534.080, "dur": 73.426, + "args": { + "External id": 295860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368332669.456, "dur": 2.740, + "args": { + "External id": 295861,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2484, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368332754.766, "dur": 1718.673, + "args": { + "External id": 295862,"Sequence number": 1209192, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2485 + } + }, + { + "ph": "f", "id": 40, "pid": 2070552, "tid": 2107648, "ts": 5333368332754.766, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368332869.236, "dur": 110.588, + "args": { + "External id": 295863,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368333019.401, "dur": 41.984, + "args": { + "External id": 295864,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368333079.767, "dur": 50.418, + "args": { + "External id": 295865,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368333140.789, "dur": 56.152, + "args": { + "External id": 295866,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368333209.466, "dur": 66.496, + "args": { + "External id": 295867,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368333286.377, "dur": 36.095, + "args": { + "External id": 295868,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368333332.732, "dur": 45.772, + "args": { + "External id": 295869,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368333405.256, "dur": 25.535, + "args": { + "External id": 295870,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368333452.659, "dur": 32.436, + "args": { + "External id": 295871,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368333506.458, "dur": 19.639, + "args": { + "External id": 295872,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368333540.659, "dur": 15.832, + "args": { + "External id": 295873,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368333565.512, "dur": 33.324, + "args": { + "External id": 295874,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368333601.976, "dur": 72.076, + "args": { + "External id": 295875,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368333707.890, "dur": 171.564, + "args": { + "External id": 295876,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368333784.370, "dur": 6.498, + "args": { + "External id": 295877,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368333793.241, "dur": 3.203, + "args": { + "External id": 295878,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368333913.351, "dur": 26.863, + "args": { + "External id": 295879,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368333951.438, "dur": 15.350, + "args": { + "External id": 295880,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368333975.159, "dur": 55.214, + "args": { + "External id": 295881,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368334043.874, "dur": 43.114, + "args": { + "External id": 295882,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368334097.678, "dur": 20.911, + "args": { + "External id": 295883,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368334122.972, "dur": 29.862, + "args": { + "External id": 295884,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368334161.484, "dur": 48.645, + "args": { + "External id": 295885,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368334227.711, "dur": 38.788, + "args": { + "External id": 295886,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368334291.662, "dur": 25.081, + "args": { + "External id": 295887,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368334335.215, "dur": 24.177, + "args": { + "External id": 295888,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368334373.136, "dur": 16.295, + "args": { + "External id": 295889,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368334404.031, "dur": 13.990, + "args": { + "External id": 295890,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368334430.443, "dur": 15.126, + "args": { + "External id": 295891,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368334522.959, "dur": 16.374, + "args": { + "External id": 295892,"Record function id": 0, "Ev Idx": 2515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368334526.470, "dur": 11.885, + "args": { + "External id": 295893,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368334531.515, "dur": 6.017, + "args": { + "External id": 295894,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368334533.202, "dur": 4.246, + "args": { + "External id": 295895,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368334543.185, "dur": 4.663, + "args": { + "External id": 295896,"Record function id": 0, "Ev Idx": 2519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368334544.431, "dur": 2.983, + "args": { + "External id": 295897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368334545.321, "dur": 1.646, + "args": { + "External id": 295898,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368334545.874, "dur": 1.020, + "args": { + "External id": 295899,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368334551.005, "dur": 6.742, + "args": { + "External id": 295900,"Record function id": 0, "Ev Idx": 2523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368334552.777, "dur": 4.538, + "args": { + "External id": 295901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368334553.697, "dur": 3.227, + "args": { + "External id": 295902,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368334554.279, "dur": 2.541, + "args": { + "External id": 295903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368334560.848, "dur": 4.673, + "args": { + "External id": 295904,"Record function id": 0, "Ev Idx": 2527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368334562.569, "dur": 2.529, + "args": { + "External id": 295905,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368334563.218, "dur": 1.455, + "args": { + "External id": 295906,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368334563.689, "dur": 0.893, + "args": { + "External id": 295907,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368334568.493, "dur": 4.101, + "args": { + "External id": 295908,"Record function id": 0, "Ev Idx": 2531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368334569.602, "dur": 2.564, + "args": { + "External id": 295909,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368334570.308, "dur": 1.445, + "args": { + "External id": 295910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368334571.044, "dur": 0.603, + "args": { + "External id": 295911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368334575.545, "dur": 7.190, + "args": { + "External id": 295912,"Record function id": 0, "Ev Idx": 2535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368334580.084, "dur": 2.222, + "args": { + "External id": 295913,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368334580.543, "dur": 1.235, + "args": { + "External id": 295914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368334581.120, "dur": 0.584, + "args": { + "External id": 295915,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368334586.121, "dur": 3.937, + "args": { + "External id": 295916,"Record function id": 0, "Ev Idx": 2539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368334587.367, "dur": 2.290, + "args": { + "External id": 295917,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368334588.071, "dur": 1.174, + "args": { + "External id": 295918,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368334588.412, "dur": 0.758, + "args": { + "External id": 295919,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368334593.211, "dur": 4.075, + "args": { + "External id": 295920,"Record function id": 0, "Ev Idx": 2543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368334594.665, "dur": 2.203, + "args": { + "External id": 295921,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368334595.112, "dur": 1.369, + "args": { + "External id": 295922,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368334595.666, "dur": 0.743, + "args": { + "External id": 295923,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368334600.277, "dur": 4.593, + "args": { + "External id": 295924,"Record function id": 0, "Ev Idx": 2547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368334601.930, "dur": 2.549, + "args": { + "External id": 295925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368334602.665, "dur": 1.426, + "args": { + "External id": 295926,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368334603.362, "dur": 0.653, + "args": { + "External id": 295927,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368334608.925, "dur": 38870.184, + "args": { + "External id": 295928,"Record function id": 0, "Sequence number": 1209191, "Fwd thread id": 1, "Ev Idx": 2551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368334610.193, "dur": 38859.640, + "args": { + "External id": 295929,"Sequence number": 1209191, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2552 + } + }, + { + "ph": "f", "id": 41, "pid": 2070552, "tid": 2107648, "ts": 5333368334610.193, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.15)", "pid": 2070552, "tid": 2107648, + "ts": 5333368334678.279, "dur": 40.334, + "args": { + "External id": 295930,"Record function id": 0, "Ev Idx": 2553 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.15)", "pid": 2070552, "tid": 2107648, + "ts": 5333368334726.732, "dur": 67.767, + "args": { + "External id": 295931,"Record function id": 0, "Ev Idx": 2554 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.15)", "pid": 2070552, "tid": 2107648, + "ts": 5333368334800.419, "dur": 38660.239, + "args": { + "External id": 295932,"Record function id": 0, "Ev Idx": 2555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368334902.419, "dur": 7.943, + "args": { + "External id": 295933,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368334920.454, "dur": 7.225, + "args": { + "External id": 295934,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368334941.699, "dur": 37608.024, + "args": { + "External id": 295935,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368334954.578, "dur": 37584.480, + "args": { + "External id": 295936,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368334999.796, "dur": 14.373, + "args": { + "External id": 295937,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368335020.302, "dur": 37480.734, + "args": { + "External id": 295938,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368335023.014, "dur": 37477.146, + "args": { + "External id": 295939,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368335027.010, "dur": 4.920, + "args": { + "External id": 295940,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368335033.569, "dur": 37462.298, + "args": { + "External id": 295941,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368372678.534, "dur": 10.498, + "args": { + "External id": 295942,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368372681.628, "dur": 6.825, + "args": { + "External id": 295943,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368372719.789, "dur": 395.578, + "args": { + "External id": 295944,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368372745.014, "dur": 365.427, + "args": { + "External id": 295945,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2568, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368372756.267, "dur": 348.829, + "args": { + "External id": 295946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368373134.587, "dur": 2.383, + "args": { + "External id": 295947,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2570, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368373225.902, "dur": 7.991, + "args": { + "External id": 295948,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368373284.500, "dur": 1.843, + "args": { + "External id": 295949,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368373305.306, "dur": 1.930, + "args": { + "External id": 295950,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368373324.985, "dur": 1.330, + "args": { + "External id": 295951,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368373342.353, "dur": 1.118, + "args": { + "External id": 295952,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368373355.064, "dur": 0.722, + "args": { + "External id": 295953,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368373367.668, "dur": 1.177, + "args": { + "External id": 295954,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368373381.426, "dur": 2.335, + "args": { + "External id": 295955,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368373393.887, "dur": 0.843, + "args": { + "External id": 295956,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368373495.771, "dur": 2873.891, + "args": { + "External id": 295957,"Record function id": 0, "Ev Idx": 2580 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.14)", "pid": 2070552, "tid": 2107648, + "ts": 5333368373516.616, "dur": 1080.761, + "args": { + "External id": 295958,"Record function id": 0, "Ev Idx": 2581 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2070552, "tid": 2107648, + "ts": 5333368373531.250, "dur": 366.099, + "args": { + "External id": 295959,"Record function id": 0, "Ev Idx": 2582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368373655.264, "dur": 5.302, + "args": { + "External id": 295960,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368373666.204, "dur": 1.162, + "args": { + "External id": 295961,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368373669.521, "dur": 0.671, + "args": { + "External id": 295962,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368373671.914, "dur": 2.720, + "args": { + "External id": 295963,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368373676.529, "dur": 0.750, + "args": { + "External id": 295964,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368373679.144, "dur": 0.622, + "args": { + "External id": 295965,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368373681.612, "dur": 1.567, + "args": { + "External id": 295966,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368373684.637, "dur": 0.649, + "args": { + "External id": 295967,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368373686.883, "dur": 0.592, + "args": { + "External id": 295968,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368373689.311, "dur": 0.760, + "args": { + "External id": 295969,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368373711.430, "dur": 157.010, + "args": { + "External id": 295970,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368373728.658, "dur": 135.133, + "args": { + "External id": 295971,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368373745.634, "dur": 13.010, + "args": { + "External id": 295972,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368373762.513, "dur": 72.388, + "args": { + "External id": 295973,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368373765.188, "dur": 69.434, + "args": { + "External id": 295974,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368373769.018, "dur": 7.294, + "args": { + "External id": 295975,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368373778.106, "dur": 55.936, + "args": { + "External id": 295976,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2599 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.13", "pid": 2070552, "tid": 2107648, + "ts": 5333368373995.699, "dur": 593.270, + "args": { + "External id": 295977,"Record function id": 0, "Ev Idx": 2600 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2070552, "tid": 2107648, + "ts": 5333368374014.507, "dur": 561.923, + "args": { + "External id": 295978,"Record function id": 0, "Ev Idx": 2601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368374078.706, "dur": 4.454, + "args": { + "External id": 295979,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368374099.529, "dur": 26.986, + "args": { + "External id": 295980,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368374104.269, "dur": 1.653, + "args": { + "External id": 295981,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368374107.970, "dur": 0.559, + "args": { + "External id": 295982,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368374109.612, "dur": 0.349, + "args": { + "External id": 295983,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368374110.944, "dur": 0.689, + "args": { + "External id": 295984,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368374113.291, "dur": 0.527, + "args": { + "External id": 295985,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368374114.757, "dur": 0.734, + "args": { + "External id": 295986,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368374116.862, "dur": 2.132, + "args": { + "External id": 295987,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368374120.163, "dur": 0.607, + "args": { + "External id": 295988,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368374122.024, "dur": 0.506, + "args": { + "External id": 295989,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368374137.075, "dur": 51.122, + "args": { + "External id": 295990,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368374229.016, "dur": 111.979, + "args": { + "External id": 295991,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368374241.796, "dur": 6.560, + "args": { + "External id": 295992,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368374255.379, "dur": 11.281, + "args": { + "External id": 295993,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368374260.020, "dur": 6.227, + "args": { + "External id": 295994,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368374263.707, "dur": 0.955, + "args": { + "External id": 295995,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368374273.819, "dur": 23.664, + "args": { + "External id": 295996,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368374275.697, "dur": 0.630, + "args": { + "External id": 295997,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368374278.024, "dur": 0.647, + "args": { + "External id": 295998,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368374280.257, "dur": 0.719, + "args": { + "External id": 295999,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368374282.297, "dur": 2.511, + "args": { + "External id": 296000,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368374286.019, "dur": 0.366, + "args": { + "External id": 296001,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368374287.791, "dur": 0.438, + "args": { + "External id": 296002,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368374289.485, "dur": 0.520, + "args": { + "External id": 296003,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368374291.694, "dur": 0.579, + "args": { + "External id": 296004,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368374293.593, "dur": 0.640, + "args": { + "External id": 296005,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368374309.209, "dur": 24.161, + "args": { + "External id": 296006,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368374388.356, "dur": 118.042, + "args": { + "External id": 296007,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368374414.046, "dur": 88.780, + "args": { + "External id": 296008,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2631, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368374423.426, "dur": 74.681, + "args": { + "External id": 296009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368374521.501, "dur": 1.695, + "args": { + "External id": 296010,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2633, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368374604.611, "dur": 1742.827, + "args": { + "External id": 296011,"Sequence number": 1209190, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2634 + } + }, + { + "ph": "f", "id": 42, "pid": 2070552, "tid": 2107648, "ts": 5333368374604.611, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368374761.091, "dur": 111.318, + "args": { + "External id": 296012,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368374916.504, "dur": 42.624, + "args": { + "External id": 296013,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368374976.123, "dur": 48.696, + "args": { + "External id": 296014,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368375034.725, "dur": 33.677, + "args": { + "External id": 296015,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368375076.187, "dur": 45.291, + "args": { + "External id": 296016,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368375127.776, "dur": 28.501, + "args": { + "External id": 296017,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368375164.375, "dur": 78.206, + "args": { + "External id": 296018,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368375276.390, "dur": 29.506, + "args": { + "External id": 296019,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368375325.344, "dur": 30.246, + "args": { + "External id": 296020,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368375375.741, "dur": 18.118, + "args": { + "External id": 296021,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368375406.931, "dur": 13.887, + "args": { + "External id": 296022,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368375430.522, "dur": 34.784, + "args": { + "External id": 296023,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368375468.258, "dur": 34.168, + "args": { + "External id": 296024,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368375534.421, "dur": 223.749, + "args": { + "External id": 296025,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368375616.177, "dur": 48.007, + "args": { + "External id": 296026,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368375667.771, "dur": 5.046, + "args": { + "External id": 296027,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368375792.852, "dur": 26.212, + "args": { + "External id": 296028,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368375830.449, "dur": 15.621, + "args": { + "External id": 296029,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368375854.102, "dur": 56.460, + "args": { + "External id": 296030,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368375921.387, "dur": 44.570, + "args": { + "External id": 296031,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368375973.714, "dur": 25.129, + "args": { + "External id": 296032,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368376004.605, "dur": 29.143, + "args": { + "External id": 296033,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368376039.021, "dur": 21.845, + "args": { + "External id": 296034,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368376067.653, "dur": 30.472, + "args": { + "External id": 296035,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368376117.352, "dur": 22.132, + "args": { + "External id": 296036,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368376156.956, "dur": 49.450, + "args": { + "External id": 296037,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368376232.901, "dur": 20.002, + "args": { + "External id": 296038,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368376269.146, "dur": 14.086, + "args": { + "External id": 296039,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368376298.661, "dur": 16.931, + "args": { + "External id": 296040,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368376393.474, "dur": 15.689, + "args": { + "External id": 296041,"Record function id": 0, "Ev Idx": 2664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368376396.847, "dur": 11.152, + "args": { + "External id": 296042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368376401.369, "dur": 5.732, + "args": { + "External id": 296043,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368376402.595, "dur": 4.377, + "args": { + "External id": 296044,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368376412.826, "dur": 4.911, + "args": { + "External id": 296045,"Record function id": 0, "Ev Idx": 2668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368376414.439, "dur": 2.827, + "args": { + "External id": 296046,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368376415.046, "dur": 1.672, + "args": { + "External id": 296047,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368376415.553, "dur": 1.060, + "args": { + "External id": 296048,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368376420.925, "dur": 4.639, + "args": { + "External id": 296049,"Record function id": 0, "Ev Idx": 2672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368376422.393, "dur": 2.772, + "args": { + "External id": 296050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368376423.038, "dur": 1.715, + "args": { + "External id": 296051,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368376423.605, "dur": 1.060, + "args": { + "External id": 296052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368376428.662, "dur": 4.091, + "args": { + "External id": 296053,"Record function id": 0, "Ev Idx": 2676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368376430.115, "dur": 2.224, + "args": { + "External id": 296054,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368376430.933, "dur": 0.985, + "args": { + "External id": 296055,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368376431.209, "dur": 0.621, + "args": { + "External id": 296056,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368376435.775, "dur": 4.074, + "args": { + "External id": 296057,"Record function id": 0, "Ev Idx": 2680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368376437.131, "dur": 2.209, + "args": { + "External id": 296058,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368376437.625, "dur": 1.304, + "args": { + "External id": 296059,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368376438.226, "dur": 0.626, + "args": { + "External id": 296060,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368376442.942, "dur": 4.496, + "args": { + "External id": 296061,"Record function id": 0, "Ev Idx": 2684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368376444.544, "dur": 2.493, + "args": { + "External id": 296062,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368376445.234, "dur": 1.385, + "args": { + "External id": 296063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368376445.793, "dur": 0.750, + "args": { + "External id": 296064,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368376450.550, "dur": 5.652, + "args": { + "External id": 296065,"Record function id": 0, "Ev Idx": 2688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368376452.120, "dur": 3.672, + "args": { + "External id": 296066,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368376452.770, "dur": 2.622, + "args": { + "External id": 296067,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368376453.089, "dur": 2.219, + "args": { + "External id": 296068,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368376459.197, "dur": 4.128, + "args": { + "External id": 296069,"Record function id": 0, "Ev Idx": 2692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368376460.567, "dur": 2.368, + "args": { + "External id": 296070,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368376461.050, "dur": 1.470, + "args": { + "External id": 296071,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368376461.574, "dur": 0.873, + "args": { + "External id": 296072,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368376466.276, "dur": 3.831, + "args": { + "External id": 296073,"Record function id": 0, "Ev Idx": 2696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368376467.681, "dur": 2.023, + "args": { + "External id": 296074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368376468.155, "dur": 1.140, + "args": { + "External id": 296075,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368376468.607, "dur": 0.613, + "args": { + "External id": 296076,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368376473.856, "dur": 36995.867, + "args": { + "External id": 296077,"Record function id": 0, "Sequence number": 1209189, "Fwd thread id": 1, "Ev Idx": 2700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368376475.433, "dur": 36983.947, + "args": { + "External id": 296078,"Sequence number": 1209189, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2701 + } + }, + { + "ph": "f", "id": 43, "pid": 2070552, "tid": 2107648, "ts": 5333368376475.433, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.14)", "pid": 2070552, "tid": 2107648, + "ts": 5333368376507.180, "dur": 41.283, + "args": { + "External id": 296079,"Record function id": 0, "Ev Idx": 2702 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.14)", "pid": 2070552, "tid": 2107648, + "ts": 5333368376556.115, "dur": 109.915, + "args": { + "External id": 296080,"Record function id": 0, "Ev Idx": 2703 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.14)", "pid": 2070552, "tid": 2107648, + "ts": 5333368376674.377, "dur": 36775.962, + "args": { + "External id": 296081,"Record function id": 0, "Ev Idx": 2704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368376768.300, "dur": 7.605, + "args": { + "External id": 296082,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368376786.605, "dur": 5.407, + "args": { + "External id": 296083,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368376809.769, "dur": 35758.055, + "args": { + "External id": 296084,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368376824.570, "dur": 35731.994, + "args": { + "External id": 296085,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368376866.888, "dur": 14.357, + "args": { + "External id": 296086,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368376887.462, "dur": 35628.819, + "args": { + "External id": 296087,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368376890.035, "dur": 35625.387, + "args": { + "External id": 296088,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368376894.515, "dur": 5.161, + "args": { + "External id": 296089,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368376901.585, "dur": 35609.680, + "args": { + "External id": 296090,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368412691.899, "dur": 9.445, + "args": { + "External id": 296091,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368412694.756, "dur": 6.110, + "args": { + "External id": 296092,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368412731.248, "dur": 354.177, + "args": { + "External id": 296093,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368412756.956, "dur": 323.648, + "args": { + "External id": 296094,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2717, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368412767.665, "dur": 308.086, + "args": { + "External id": 296095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368413104.337, "dur": 2.219, + "args": { + "External id": 296096,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2719, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368413184.132, "dur": 8.219, + "args": { + "External id": 296097,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368413247.413, "dur": 3.352, + "args": { + "External id": 296098,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368413271.766, "dur": 1.487, + "args": { + "External id": 296099,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368413290.325, "dur": 1.069, + "args": { + "External id": 296100,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368413307.131, "dur": 0.987, + "args": { + "External id": 296101,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368413322.474, "dur": 3.515, + "args": { + "External id": 296102,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368413340.679, "dur": 1.430, + "args": { + "External id": 296103,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368413357.516, "dur": 2.165, + "args": { + "External id": 296104,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368413373.380, "dur": 1.157, + "args": { + "External id": 296105,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368413488.984, "dur": 2958.686, + "args": { + "External id": 296106,"Record function id": 0, "Ev Idx": 2729 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.13)", "pid": 2070552, "tid": 2107648, + "ts": 5333368413511.908, "dur": 1105.197, + "args": { + "External id": 296107,"Record function id": 0, "Ev Idx": 2730 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2070552, "tid": 2107648, + "ts": 5333368413529.812, "dur": 387.145, + "args": { + "External id": 296108,"Record function id": 0, "Ev Idx": 2731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368413672.363, "dur": 6.410, + "args": { + "External id": 296109,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368413682.785, "dur": 2.840, + "args": { + "External id": 296110,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368413687.625, "dur": 0.970, + "args": { + "External id": 296111,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368413690.360, "dur": 0.851, + "args": { + "External id": 296112,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368413692.775, "dur": 1.004, + "args": { + "External id": 296113,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368413695.372, "dur": 0.697, + "args": { + "External id": 296114,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368413697.616, "dur": 1.784, + "args": { + "External id": 296115,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368413701.060, "dur": 0.666, + "args": { + "External id": 296116,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368413703.360, "dur": 0.971, + "args": { + "External id": 296117,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368413705.758, "dur": 2.678, + "args": { + "External id": 296118,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368413728.603, "dur": 157.260, + "args": { + "External id": 296119,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368413745.889, "dur": 135.158, + "args": { + "External id": 296120,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368413761.134, "dur": 13.664, + "args": { + "External id": 296121,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368413779.088, "dur": 73.074, + "args": { + "External id": 296122,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368413781.940, "dur": 69.914, + "args": { + "External id": 296123,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368413786.018, "dur": 6.585, + "args": { + "External id": 296124,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368413794.212, "dur": 57.023, + "args": { + "External id": 296125,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2748 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.12", "pid": 2070552, "tid": 2107648, + "ts": 5333368414013.706, "dur": 595.076, + "args": { + "External id": 296126,"Record function id": 0, "Ev Idx": 2749 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2070552, "tid": 2107648, + "ts": 5333368414032.409, "dur": 563.477, + "args": { + "External id": 296127,"Record function id": 0, "Ev Idx": 2750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368414096.987, "dur": 4.583, + "args": { + "External id": 296128,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368414117.630, "dur": 26.099, + "args": { + "External id": 296129,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368414122.132, "dur": 1.887, + "args": { + "External id": 296130,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368414125.688, "dur": 0.403, + "args": { + "External id": 296131,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368414127.377, "dur": 0.429, + "args": { + "External id": 296132,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368414128.769, "dur": 0.465, + "args": { + "External id": 296133,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368414130.466, "dur": 0.855, + "args": { + "External id": 296134,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368414132.317, "dur": 2.377, + "args": { + "External id": 296135,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368414136.254, "dur": 0.807, + "args": { + "External id": 296136,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368414137.894, "dur": 0.351, + "args": { + "External id": 296137,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368414139.552, "dur": 0.422, + "args": { + "External id": 296138,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368414153.464, "dur": 58.709, + "args": { + "External id": 296139,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368414254.969, "dur": 108.203, + "args": { + "External id": 296140,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368414267.168, "dur": 5.567, + "args": { + "External id": 296141,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368414277.714, "dur": 10.158, + "args": { + "External id": 296142,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368414281.845, "dur": 5.618, + "args": { + "External id": 296143,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368414285.266, "dur": 0.649, + "args": { + "External id": 296144,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368414295.374, "dur": 25.229, + "args": { + "External id": 296145,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368414297.614, "dur": 1.013, + "args": { + "External id": 296146,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368414300.176, "dur": 0.510, + "args": { + "External id": 296147,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368414301.728, "dur": 2.231, + "args": { + "External id": 296148,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368414304.919, "dur": 0.496, + "args": { + "External id": 296149,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368414306.551, "dur": 0.821, + "args": { + "External id": 296150,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368414308.447, "dur": 0.379, + "args": { + "External id": 296151,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368414309.645, "dur": 0.778, + "args": { + "External id": 296152,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368414311.701, "dur": 3.557, + "args": { + "External id": 296153,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368414316.681, "dur": 0.517, + "args": { + "External id": 296154,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368414331.837, "dur": 23.620, + "args": { + "External id": 296155,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368414413.959, "dur": 114.734, + "args": { + "External id": 296156,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368414437.814, "dur": 87.432, + "args": { + "External id": 296157,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2780, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368414447.141, "dur": 73.880, + "args": { + "External id": 296158,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368414543.528, "dur": 1.755, + "args": { + "External id": 296159,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2782, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368414664.807, "dur": 1761.819, + "args": { + "External id": 296160,"Sequence number": 1209188, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2783 + } + }, + { + "ph": "f", "id": 44, "pid": 2070552, "tid": 2107648, "ts": 5333368414664.807, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368414790.279, "dur": 110.568, + "args": { + "External id": 296161,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368414942.709, "dur": 42.329, + "args": { + "External id": 296162,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368415001.460, "dur": 50.720, + "args": { + "External id": 296163,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368415061.979, "dur": 33.754, + "args": { + "External id": 296164,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368415101.273, "dur": 46.392, + "args": { + "External id": 296165,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368415156.329, "dur": 55.529, + "args": { + "External id": 296166,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368415226.069, "dur": 56.798, + "args": { + "External id": 296167,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368415314.405, "dur": 32.807, + "args": { + "External id": 296168,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368415375.035, "dur": 31.699, + "args": { + "External id": 296169,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368415429.733, "dur": 19.950, + "args": { + "External id": 296170,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368415462.878, "dur": 15.598, + "args": { + "External id": 296171,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368415488.421, "dur": 31.857, + "args": { + "External id": 296172,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368415523.402, "dur": 32.722, + "args": { + "External id": 296173,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368415585.783, "dur": 230.848, + "args": { + "External id": 296174,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368415709.543, "dur": 7.595, + "args": { + "External id": 296175,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368415723.255, "dur": 3.598, + "args": { + "External id": 296176,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368415857.173, "dur": 40.467, + "args": { + "External id": 296177,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368415918.063, "dur": 16.512, + "args": { + "External id": 296178,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368415945.391, "dur": 49.619, + "args": { + "External id": 296179,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368416001.721, "dur": 42.350, + "args": { + "External id": 296180,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368416050.353, "dur": 21.289, + "args": { + "External id": 296181,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368416076.080, "dur": 32.758, + "args": { + "External id": 296182,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368416113.904, "dur": 20.828, + "args": { + "External id": 296183,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368416146.230, "dur": 49.934, + "args": { + "External id": 296184,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368416223.181, "dur": 28.142, + "args": { + "External id": 296185,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368416270.831, "dur": 26.780, + "args": { + "External id": 296186,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368416315.073, "dur": 20.321, + "args": { + "External id": 296187,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368416351.611, "dur": 14.499, + "args": { + "External id": 296188,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368416380.952, "dur": 17.163, + "args": { + "External id": 296189,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368416471.315, "dur": 15.916, + "args": { + "External id": 296190,"Record function id": 0, "Ev Idx": 2813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368416474.781, "dur": 11.377, + "args": { + "External id": 296191,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368416479.327, "dur": 5.900, + "args": { + "External id": 296192,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368416480.727, "dur": 4.369, + "args": { + "External id": 296193,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368416491.129, "dur": 4.822, + "args": { + "External id": 296194,"Record function id": 0, "Ev Idx": 2817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368416492.498, "dur": 3.010, + "args": { + "External id": 296195,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368416493.529, "dur": 1.474, + "args": { + "External id": 296196,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368416493.891, "dur": 0.996, + "args": { + "External id": 296197,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368416499.141, "dur": 4.841, + "args": { + "External id": 296198,"Record function id": 0, "Ev Idx": 2821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368416501.199, "dur": 2.314, + "args": { + "External id": 296199,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368416501.674, "dur": 1.420, + "args": { + "External id": 296200,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368416502.083, "dur": 0.919, + "args": { + "External id": 296201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368416507.140, "dur": 6.333, + "args": { + "External id": 296202,"Record function id": 0, "Ev Idx": 2825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368416508.354, "dur": 4.704, + "args": { + "External id": 296203,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368416508.960, "dur": 3.658, + "args": { + "External id": 296204,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368416509.497, "dur": 3.036, + "args": { + "External id": 296205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368416516.525, "dur": 5.122, + "args": { + "External id": 296206,"Record function id": 0, "Ev Idx": 2829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368416517.829, "dur": 3.354, + "args": { + "External id": 296207,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368416518.577, "dur": 2.054, + "args": { + "External id": 296208,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368416519.551, "dur": 0.995, + "args": { + "External id": 296209,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368416524.780, "dur": 4.069, + "args": { + "External id": 296210,"Record function id": 0, "Ev Idx": 2833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368416526.100, "dur": 2.299, + "args": { + "External id": 296211,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368416526.603, "dur": 1.366, + "args": { + "External id": 296212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368416527.338, "dur": 0.557, + "args": { + "External id": 296213,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368416532.005, "dur": 4.186, + "args": { + "External id": 296214,"Record function id": 0, "Ev Idx": 2837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368416533.369, "dur": 2.376, + "args": { + "External id": 296215,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368416533.932, "dur": 1.426, + "args": { + "External id": 296216,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368416534.715, "dur": 0.566, + "args": { + "External id": 296217,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368416539.191, "dur": 3.873, + "args": { + "External id": 296218,"Record function id": 0, "Ev Idx": 2841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368416540.492, "dur": 2.150, + "args": { + "External id": 296219,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368416540.967, "dur": 1.275, + "args": { + "External id": 296220,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368416541.555, "dur": 0.616, + "args": { + "External id": 296221,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368416546.052, "dur": 4.378, + "args": { + "External id": 296222,"Record function id": 0, "Ev Idx": 2845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368416547.221, "dur": 2.792, + "args": { + "External id": 296223,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368416547.683, "dur": 1.579, + "args": { + "External id": 296224,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368416548.304, "dur": 0.882, + "args": { + "External id": 296225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368416554.176, "dur": 36438.150, + "args": { + "External id": 296226,"Record function id": 0, "Sequence number": 1209187, "Fwd thread id": 1, "Ev Idx": 2849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368416555.770, "dur": 36428.002, + "args": { + "External id": 296227,"Sequence number": 1209187, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2850 + } + }, + { + "ph": "f", "id": 45, "pid": 2070552, "tid": 2107648, "ts": 5333368416555.770, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.13)", "pid": 2070552, "tid": 2107648, + "ts": 5333368416586.158, "dur": 78.051, + "args": { + "External id": 296228,"Record function id": 0, "Ev Idx": 2851 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.13)", "pid": 2070552, "tid": 2107648, + "ts": 5333368416674.289, "dur": 74.529, + "args": { + "External id": 296229,"Record function id": 0, "Ev Idx": 2852 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.13)", "pid": 2070552, "tid": 2107648, + "ts": 5333368416755.376, "dur": 36220.091, + "args": { + "External id": 296230,"Record function id": 0, "Ev Idx": 2853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368416850.727, "dur": 7.881, + "args": { + "External id": 296231,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368416868.863, "dur": 5.054, + "args": { + "External id": 296232,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368416888.599, "dur": 35218.076, + "args": { + "External id": 296233,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368416910.406, "dur": 35184.336, + "args": { + "External id": 296234,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 2857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368416948.089, "dur": 14.775, + "args": { + "External id": 296235,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368416969.051, "dur": 35085.291, + "args": { + "External id": 296236,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 2859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368416971.770, "dur": 35081.660, + "args": { + "External id": 296237,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 2860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368416975.803, "dur": 6.753, + "args": { + "External id": 296238,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368416984.609, "dur": 35064.155, + "args": { + "External id": 296239,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 2862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368452230.265, "dur": 11.485, + "args": { + "External id": 296240,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 2863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368452233.385, "dur": 7.867, + "args": { + "External id": 296241,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368452270.971, "dur": 402.914, + "args": { + "External id": 296242,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 2865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368452300.560, "dur": 368.132, + "args": { + "External id": 296243,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2866, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368452311.516, "dur": 351.298, + "args": { + "External id": 296244,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 2867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368452697.754, "dur": 2.505, + "args": { + "External id": 296245,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2868, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368452764.858, "dur": 6.644, + "args": { + "External id": 296246,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368452817.474, "dur": 1.468, + "args": { + "External id": 296247,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368452834.318, "dur": 1.169, + "args": { + "External id": 296248,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368452847.805, "dur": 2.763, + "args": { + "External id": 296249,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368452862.034, "dur": 0.874, + "args": { + "External id": 296250,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368452873.144, "dur": 1.050, + "args": { + "External id": 296251,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368452885.602, "dur": 1.055, + "args": { + "External id": 296252,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368452897.729, "dur": 3.707, + "args": { + "External id": 296253,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368452912.478, "dur": 0.784, + "args": { + "External id": 296254,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368453007.005, "dur": 2914.497, + "args": { + "External id": 296255,"Record function id": 0, "Ev Idx": 2878 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.12)", "pid": 2070552, "tid": 2107648, + "ts": 5333368453027.598, "dur": 1082.724, + "args": { + "External id": 296256,"Record function id": 0, "Ev Idx": 2879 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2070552, "tid": 2107648, + "ts": 5333368453041.963, "dur": 362.990, + "args": { + "External id": 296257,"Record function id": 0, "Ev Idx": 2880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368453129.564, "dur": 4.249, + "args": { + "External id": 296258,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 2881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368453136.946, "dur": 0.777, + "args": { + "External id": 296259,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368453139.897, "dur": 0.762, + "args": { + "External id": 296260,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368453142.639, "dur": 0.931, + "args": { + "External id": 296261,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368453145.116, "dur": 1.703, + "args": { + "External id": 296262,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368453148.435, "dur": 0.877, + "args": { + "External id": 296263,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 2886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368453151.115, "dur": 3.994, + "args": { + "External id": 296264,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 2887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368453156.930, "dur": 0.514, + "args": { + "External id": 296265,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368453159.319, "dur": 1.149, + "args": { + "External id": 296266,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368453162.230, "dur": 0.905, + "args": { + "External id": 296267,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 2890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368453202.665, "dur": 170.235, + "args": { + "External id": 296268,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368453225.020, "dur": 142.865, + "args": { + "External id": 296269,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 2892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368453242.486, "dur": 16.127, + "args": { + "External id": 296270,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368453263.568, "dur": 74.649, + "args": { + "External id": 296271,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 2894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368453266.410, "dur": 71.470, + "args": { + "External id": 296272,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 2895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368453271.079, "dur": 8.595, + "args": { + "External id": 296273,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368453281.778, "dur": 55.599, + "args": { + "External id": 296274,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 2897 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.11", "pid": 2070552, "tid": 2107648, + "ts": 5333368453502.130, "dur": 600.477, + "args": { + "External id": 296275,"Record function id": 0, "Ev Idx": 2898 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2070552, "tid": 2107648, + "ts": 5333368453519.554, "dur": 570.100, + "args": { + "External id": 296276,"Record function id": 0, "Ev Idx": 2899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368453583.535, "dur": 5.012, + "args": { + "External id": 296277,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368453605.863, "dur": 67.911, + "args": { + "External id": 296278,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368453611.198, "dur": 1.571, + "args": { + "External id": 296279,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368453614.671, "dur": 0.933, + "args": { + "External id": 296280,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368453616.905, "dur": 37.791, + "args": { + "External id": 296281,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368453657.639, "dur": 0.580, + "args": { + "External id": 296282,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368453660.024, "dur": 0.341, + "args": { + "External id": 296283,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368453661.476, "dur": 0.410, + "args": { + "External id": 296284,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368453663.322, "dur": 0.668, + "args": { + "External id": 296285,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368453665.697, "dur": 0.706, + "args": { + "External id": 296286,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368453668.446, "dur": 0.556, + "args": { + "External id": 296287,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368453686.307, "dur": 35.678, + "args": { + "External id": 296288,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368453754.624, "dur": 105.966, + "args": { + "External id": 296289,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 2912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368453765.422, "dur": 4.382, + "args": { + "External id": 296290,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368453774.685, "dur": 11.959, + "args": { + "External id": 296291,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368453779.076, "dur": 7.160, + "args": { + "External id": 296292,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 2915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368453782.491, "dur": 2.543, + "args": { + "External id": 296293,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 2916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368453793.866, "dur": 28.454, + "args": { + "External id": 296294,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 2917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368453796.252, "dur": 0.613, + "args": { + "External id": 296295,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368453798.332, "dur": 0.494, + "args": { + "External id": 296296,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368453803.618, "dur": 0.333, + "args": { + "External id": 296297,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368453805.974, "dur": 0.711, + "args": { + "External id": 296298,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368453808.327, "dur": 0.361, + "args": { + "External id": 296299,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368453810.438, "dur": 0.373, + "args": { + "External id": 296300,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368453812.158, "dur": 0.388, + "args": { + "External id": 296301,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368453814.777, "dur": 2.364, + "args": { + "External id": 296302,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368453818.650, "dur": 0.308, + "args": { + "External id": 296303,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 2926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368453833.336, "dur": 19.655, + "args": { + "External id": 296304,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 2927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368453907.793, "dur": 116.038, + "args": { + "External id": 296305,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 2928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368453934.581, "dur": 85.836, + "args": { + "External id": 296306,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 2929, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368453943.699, "dur": 72.063, + "args": { + "External id": 296307,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 2930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368454040.523, "dur": 1.767, + "args": { + "External id": 296308,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 2931, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368454118.173, "dur": 1782.335, + "args": { + "External id": 296309,"Sequence number": 1209186, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2932 + } + }, + { + "ph": "f", "id": 46, "pid": 2070552, "tid": 2107648, "ts": 5333368454118.173, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368454263.501, "dur": 113.747, + "args": { + "External id": 296310,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 2933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368454419.365, "dur": 40.523, + "args": { + "External id": 296311,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 2934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368454476.742, "dur": 50.986, + "args": { + "External id": 296312,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 2935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368454539.914, "dur": 32.202, + "args": { + "External id": 296313,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368454578.229, "dur": 86.757, + "args": { + "External id": 296314,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368454676.586, "dur": 33.028, + "args": { + "External id": 296315,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 2938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368454718.531, "dur": 43.138, + "args": { + "External id": 296316,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 2939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368454788.288, "dur": 28.265, + "args": { + "External id": 296317,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 2940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368454841.483, "dur": 31.361, + "args": { + "External id": 296318,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368454894.275, "dur": 19.276, + "args": { + "External id": 296319,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368454926.753, "dur": 15.373, + "args": { + "External id": 296320,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368454950.916, "dur": 30.475, + "args": { + "External id": 296321,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368454984.297, "dur": 34.432, + "args": { + "External id": 296322,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368455049.776, "dur": 213.238, + "args": { + "External id": 296323,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 2946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368455129.934, "dur": 7.340, + "args": { + "External id": 296324,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368455139.406, "dur": 3.072, + "args": { + "External id": 296325,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 2948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368455300.892, "dur": 26.681, + "args": { + "External id": 296326,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368455338.744, "dur": 15.200, + "args": { + "External id": 296327,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368455363.037, "dur": 44.687, + "args": { + "External id": 296328,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368455414.180, "dur": 58.896, + "args": { + "External id": 296329,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368455488.135, "dur": 25.620, + "args": { + "External id": 296330,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368455522.497, "dur": 31.078, + "args": { + "External id": 296331,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368455559.696, "dur": 20.157, + "args": { + "External id": 296332,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 2955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368455586.224, "dur": 31.134, + "args": { + "External id": 296333,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 2956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368455695.897, "dur": 26.838, + "args": { + "External id": 296334,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 2957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368455745.643, "dur": 25.768, + "args": { + "External id": 296335,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 2958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368455787.329, "dur": 20.549, + "args": { + "External id": 296336,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 2959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368455823.278, "dur": 15.725, + "args": { + "External id": 296337,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 2960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368455852.353, "dur": 16.803, + "args": { + "External id": 296338,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 2961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368455946.272, "dur": 15.998, + "args": { + "External id": 296339,"Record function id": 0, "Ev Idx": 2962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368455949.423, "dur": 11.744, + "args": { + "External id": 296340,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368455953.833, "dur": 6.312, + "args": { + "External id": 296341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368455955.559, "dur": 4.491, + "args": { + "External id": 296342,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368455965.954, "dur": 4.823, + "args": { + "External id": 296343,"Record function id": 0, "Ev Idx": 2966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368455967.480, "dur": 2.817, + "args": { + "External id": 296344,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368455968.275, "dur": 1.569, + "args": { + "External id": 296345,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368455968.656, "dur": 1.087, + "args": { + "External id": 296346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368455974.056, "dur": 5.032, + "args": { + "External id": 296347,"Record function id": 0, "Ev Idx": 2970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368455975.703, "dur": 2.952, + "args": { + "External id": 296348,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368455976.549, "dur": 1.689, + "args": { + "External id": 296349,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368455977.072, "dur": 1.072, + "args": { + "External id": 296350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 2973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368455982.198, "dur": 3.785, + "args": { + "External id": 296351,"Record function id": 0, "Ev Idx": 2974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368455983.298, "dur": 2.292, + "args": { + "External id": 296352,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368455983.767, "dur": 1.249, + "args": { + "External id": 296353,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368455984.062, "dur": 0.860, + "args": { + "External id": 296354,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 2977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368455989.124, "dur": 4.768, + "args": { + "External id": 296355,"Record function id": 0, "Ev Idx": 2978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368455990.247, "dur": 3.235, + "args": { + "External id": 296356,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368455990.715, "dur": 2.340, + "args": { + "External id": 296357,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368455991.736, "dur": 1.246, + "args": { + "External id": 296358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368455996.878, "dur": 4.709, + "args": { + "External id": 296359,"Record function id": 0, "Ev Idx": 2982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368455998.394, "dur": 2.780, + "args": { + "External id": 296360,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368455999.137, "dur": 1.466, + "args": { + "External id": 296361,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368455999.895, "dur": 0.640, + "args": { + "External id": 296362,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368456004.746, "dur": 5.915, + "args": { + "External id": 296363,"Record function id": 0, "Ev Idx": 2986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368456005.809, "dur": 4.433, + "args": { + "External id": 296364,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368456006.261, "dur": 3.533, + "args": { + "External id": 296365,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368456006.821, "dur": 2.886, + "args": { + "External id": 296366,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368456013.623, "dur": 5.053, + "args": { + "External id": 296367,"Record function id": 0, "Ev Idx": 2990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368456014.943, "dur": 3.324, + "args": { + "External id": 296368,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368456015.576, "dur": 2.280, + "args": { + "External id": 296369,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368456016.506, "dur": 1.277, + "args": { + "External id": 296370,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 2993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368456021.634, "dur": 4.688, + "args": { + "External id": 296371,"Record function id": 0, "Ev Idx": 2994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368456023.041, "dur": 2.834, + "args": { + "External id": 296372,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368456023.502, "dur": 1.935, + "args": { + "External id": 296373,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368456024.473, "dur": 0.898, + "args": { + "External id": 296374,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 2997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368456030.590, "dur": 37974.380, + "args": { + "External id": 296375,"Record function id": 0, "Sequence number": 1209185, "Fwd thread id": 1, "Ev Idx": 2998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368456031.937, "dur": 37963.723, + "args": { + "External id": 296376,"Sequence number": 1209185, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 2999 + } + }, + { + "ph": "f", "id": 47, "pid": 2070552, "tid": 2107648, "ts": 5333368456031.937, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.12)", "pid": 2070552, "tid": 2107648, + "ts": 5333368456060.456, "dur": 42.449, + "args": { + "External id": 296377,"Record function id": 0, "Ev Idx": 3000 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.12)", "pid": 2070552, "tid": 2107648, + "ts": 5333368456110.887, "dur": 91.143, + "args": { + "External id": 296378,"Record function id": 0, "Ev Idx": 3001 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.12)", "pid": 2070552, "tid": 2107648, + "ts": 5333368456209.637, "dur": 37777.569, + "args": { + "External id": 296379,"Record function id": 0, "Ev Idx": 3002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368456303.216, "dur": 7.324, + "args": { + "External id": 296380,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368456320.925, "dur": 5.062, + "args": { + "External id": 296381,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368456341.346, "dur": 36689.670, + "args": { + "External id": 296382,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368456355.862, "dur": 36663.603, + "args": { + "External id": 296383,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368456391.807, "dur": 14.021, + "args": { + "External id": 296384,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368456411.848, "dur": 36568.615, + "args": { + "External id": 296385,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368456414.639, "dur": 36564.934, + "args": { + "External id": 296386,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368456418.643, "dur": 5.041, + "args": { + "External id": 296387,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368456425.536, "dur": 36549.625, + "args": { + "External id": 296388,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368493132.393, "dur": 10.416, + "args": { + "External id": 296389,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368493135.529, "dur": 6.953, + "args": { + "External id": 296390,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368493185.815, "dur": 478.902, + "args": { + "External id": 296391,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368493219.931, "dur": 397.060, + "args": { + "External id": 296392,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3015, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368493233.409, "dur": 377.419, + "args": { + "External id": 296393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368493693.366, "dur": 3.608, + "args": { + "External id": 296394,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3017, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368493771.170, "dur": 7.338, + "args": { + "External id": 296395,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368493829.086, "dur": 1.566, + "args": { + "External id": 296396,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368493846.029, "dur": 1.127, + "args": { + "External id": 296397,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368493861.255, "dur": 0.844, + "args": { + "External id": 296398,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368493872.829, "dur": 0.953, + "args": { + "External id": 296399,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368493883.794, "dur": 1.135, + "args": { + "External id": 296400,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368493895.448, "dur": 1.051, + "args": { + "External id": 296401,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368493909.916, "dur": 2.171, + "args": { + "External id": 296402,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368493922.596, "dur": 1.182, + "args": { + "External id": 296403,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368494021.266, "dur": 2852.991, + "args": { + "External id": 296404,"Record function id": 0, "Ev Idx": 3027 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.11)", "pid": 2070552, "tid": 2107648, + "ts": 5333368494042.624, "dur": 1080.289, + "args": { + "External id": 296405,"Record function id": 0, "Ev Idx": 3028 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2070552, "tid": 2107648, + "ts": 5333368494058.301, "dur": 364.312, + "args": { + "External id": 296406,"Record function id": 0, "Ev Idx": 3029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368494144.099, "dur": 4.046, + "args": { + "External id": 296407,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368494151.816, "dur": 0.704, + "args": { + "External id": 296408,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368494154.154, "dur": 0.723, + "args": { + "External id": 296409,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368494156.568, "dur": 1.182, + "args": { + "External id": 296410,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368494159.404, "dur": 1.017, + "args": { + "External id": 296411,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368494162.405, "dur": 2.863, + "args": { + "External id": 296412,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368494188.023, "dur": 2.810, + "args": { + "External id": 296413,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368494194.013, "dur": 1.314, + "args": { + "External id": 296414,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368494198.138, "dur": 1.183, + "args": { + "External id": 296415,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368494202.031, "dur": 1.140, + "args": { + "External id": 296416,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368494228.873, "dur": 161.424, + "args": { + "External id": 296417,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368494248.773, "dur": 136.687, + "args": { + "External id": 296418,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368494264.460, "dur": 13.901, + "args": { + "External id": 296419,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368494282.223, "dur": 72.261, + "args": { + "External id": 296420,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368494284.892, "dur": 69.288, + "args": { + "External id": 296421,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368494289.060, "dur": 6.823, + "args": { + "External id": 296422,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368494297.660, "dur": 56.017, + "args": { + "External id": 296423,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3046 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.10", "pid": 2070552, "tid": 2107648, + "ts": 5333368494517.781, "dur": 598.302, + "args": { + "External id": 296424,"Record function id": 0, "Ev Idx": 3047 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2070552, "tid": 2107648, + "ts": 5333368494535.819, "dur": 567.792, + "args": { + "External id": 296425,"Record function id": 0, "Ev Idx": 3048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368494600.966, "dur": 4.471, + "args": { + "External id": 296426,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368494664.737, "dur": 24.766, + "args": { + "External id": 296427,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368494669.560, "dur": 2.101, + "args": { + "External id": 296428,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368494673.374, "dur": 2.002, + "args": { + "External id": 296429,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368494676.537, "dur": 0.419, + "args": { + "External id": 296430,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368494678.033, "dur": 0.427, + "args": { + "External id": 296431,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368494679.476, "dur": 0.324, + "args": { + "External id": 296432,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368494680.665, "dur": 0.220, + "args": { + "External id": 296433,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368494682.036, "dur": 0.198, + "args": { + "External id": 296434,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368494683.711, "dur": 0.382, + "args": { + "External id": 296435,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368494685.437, "dur": 0.361, + "args": { + "External id": 296436,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368494701.269, "dur": 34.663, + "args": { + "External id": 296437,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368494771.537, "dur": 105.366, + "args": { + "External id": 296438,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368494782.269, "dur": 4.940, + "args": { + "External id": 296439,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368494792.373, "dur": 9.101, + "args": { + "External id": 296440,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368494796.418, "dur": 4.670, + "args": { + "External id": 296441,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368494799.408, "dur": 0.493, + "args": { + "External id": 296442,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368494807.920, "dur": 32.153, + "args": { + "External id": 296443,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368494809.563, "dur": 0.574, + "args": { + "External id": 296444,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368494811.267, "dur": 0.227, + "args": { + "External id": 296445,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368494813.741, "dur": 0.208, + "args": { + "External id": 296446,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368494814.985, "dur": 0.553, + "args": { + "External id": 296447,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368494817.281, "dur": 0.355, + "args": { + "External id": 296448,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368494830.008, "dur": 0.240, + "args": { + "External id": 296449,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368494831.528, "dur": 0.760, + "args": { + "External id": 296450,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368494833.992, "dur": 0.972, + "args": { + "External id": 296451,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368494835.905, "dur": 0.720, + "args": { + "External id": 296452,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368494850.809, "dur": 18.453, + "args": { + "External id": 296453,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368494923.799, "dur": 115.643, + "args": { + "External id": 296454,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368494948.238, "dur": 87.793, + "args": { + "External id": 296455,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3078, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368494957.688, "dur": 74.370, + "args": { + "External id": 296456,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368495054.217, "dur": 1.689, + "args": { + "External id": 296457,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3080, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368495131.032, "dur": 1720.320, + "args": { + "External id": 296458,"Sequence number": 1209184, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3081 + } + }, + { + "ph": "f", "id": 48, "pid": 2070552, "tid": 2107648, "ts": 5333368495131.032, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368495291.684, "dur": 115.013, + "args": { + "External id": 296459,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368495448.509, "dur": 40.087, + "args": { + "External id": 296460,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368495506.585, "dur": 50.595, + "args": { + "External id": 296461,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368495566.828, "dur": 32.538, + "args": { + "External id": 296462,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368495605.609, "dur": 86.751, + "args": { + "External id": 296463,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368495703.688, "dur": 30.885, + "args": { + "External id": 296464,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368495743.188, "dur": 43.696, + "args": { + "External id": 296465,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368495811.403, "dur": 26.167, + "args": { + "External id": 296466,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368495855.632, "dur": 33.760, + "args": { + "External id": 296467,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368495910.831, "dur": 19.210, + "args": { + "External id": 296468,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368495945.363, "dur": 14.533, + "args": { + "External id": 296469,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368495968.579, "dur": 30.758, + "args": { + "External id": 296470,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368496002.389, "dur": 36.948, + "args": { + "External id": 296471,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368496067.642, "dur": 197.317, + "args": { + "External id": 296472,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368496147.486, "dur": 6.697, + "args": { + "External id": 296473,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368496155.661, "dur": 2.486, + "args": { + "External id": 296474,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368496300.244, "dur": 24.417, + "args": { + "External id": 296475,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368496335.497, "dur": 15.258, + "args": { + "External id": 296476,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368496359.847, "dur": 56.963, + "args": { + "External id": 296477,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368496429.017, "dur": 38.194, + "args": { + "External id": 296478,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368496474.968, "dur": 24.417, + "args": { + "External id": 296479,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368496504.223, "dur": 30.291, + "args": { + "External id": 296480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368496540.045, "dur": 22.861, + "args": { + "External id": 296481,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368496569.815, "dur": 30.555, + "args": { + "External id": 296482,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368496654.857, "dur": 26.224, + "args": { + "External id": 296483,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368496700.852, "dur": 27.146, + "args": { + "External id": 296484,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368496742.844, "dur": 17.192, + "args": { + "External id": 296485,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368496776.013, "dur": 14.683, + "args": { + "External id": 296486,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368496803.207, "dur": 19.914, + "args": { + "External id": 296487,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368496900.302, "dur": 15.499, + "args": { + "External id": 296488,"Record function id": 0, "Ev Idx": 3111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368496903.536, "dur": 11.182, + "args": { + "External id": 296489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368496907.942, "dur": 5.784, + "args": { + "External id": 296490,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368496909.114, "dur": 4.520, + "args": { + "External id": 296491,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368496919.736, "dur": 4.586, + "args": { + "External id": 296492,"Record function id": 0, "Ev Idx": 3115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368496920.853, "dur": 3.037, + "args": { + "External id": 296493,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368496921.783, "dur": 1.578, + "args": { + "External id": 296494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368496922.358, "dur": 0.930, + "args": { + "External id": 296495,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368496927.510, "dur": 4.088, + "args": { + "External id": 296496,"Record function id": 0, "Ev Idx": 3119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368496928.580, "dur": 2.603, + "args": { + "External id": 296497,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368496929.321, "dur": 1.461, + "args": { + "External id": 296498,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368496929.766, "dur": 0.925, + "args": { + "External id": 296499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368496934.746, "dur": 3.582, + "args": { + "External id": 296500,"Record function id": 0, "Ev Idx": 3123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368496935.807, "dur": 2.118, + "args": { + "External id": 296501,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368496936.410, "dur": 1.085, + "args": { + "External id": 296502,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368496936.681, "dur": 0.724, + "args": { + "External id": 296503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368496941.434, "dur": 3.889, + "args": { + "External id": 296504,"Record function id": 0, "Ev Idx": 3127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368496942.440, "dur": 2.354, + "args": { + "External id": 296505,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368496942.974, "dur": 1.306, + "args": { + "External id": 296506,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368496943.393, "dur": 0.811, + "args": { + "External id": 296507,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368496948.296, "dur": 4.207, + "args": { + "External id": 296508,"Record function id": 0, "Ev Idx": 3131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368496949.373, "dur": 2.728, + "args": { + "External id": 296509,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368496949.890, "dur": 1.787, + "args": { + "External id": 296510,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368496950.708, "dur": 0.895, + "args": { + "External id": 296511,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368496955.603, "dur": 4.238, + "args": { + "External id": 296512,"Record function id": 0, "Ev Idx": 3135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368496956.848, "dur": 2.570, + "args": { + "External id": 296513,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368496957.355, "dur": 1.657, + "args": { + "External id": 296514,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368496957.972, "dur": 0.964, + "args": { + "External id": 296515,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368496963.221, "dur": 3.971, + "args": { + "External id": 296516,"Record function id": 0, "Ev Idx": 3139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368496964.357, "dur": 2.426, + "args": { + "External id": 296517,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368496964.792, "dur": 1.264, + "args": { + "External id": 296518,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368496965.172, "dur": 0.778, + "args": { + "External id": 296519,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368496970.904, "dur": 3.989, + "args": { + "External id": 296520,"Record function id": 0, "Ev Idx": 3143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368496972.053, "dur": 2.425, + "args": { + "External id": 296521,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368496972.478, "dur": 1.429, + "args": { + "External id": 296522,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368496973.043, "dur": 0.754, + "args": { + "External id": 296523,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368496978.574, "dur": 36910.328, + "args": { + "External id": 296524,"Record function id": 0, "Sequence number": 1209183, "Fwd thread id": 1, "Ev Idx": 3147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368496980.118, "dur": 36898.860, + "args": { + "External id": 296525,"Sequence number": 1209183, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3148 + } + }, + { + "ph": "f", "id": 49, "pid": 2070552, "tid": 2107648, "ts": 5333368496980.118, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.11)", "pid": 2070552, "tid": 2107648, + "ts": 5333368497010.205, "dur": 39.540, + "args": { + "External id": 296526,"Record function id": 0, "Ev Idx": 3149 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.11)", "pid": 2070552, "tid": 2107648, + "ts": 5333368497057.578, "dur": 73.901, + "args": { + "External id": 296527,"Record function id": 0, "Ev Idx": 3150 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.11)", "pid": 2070552, "tid": 2107648, + "ts": 5333368497138.646, "dur": 36731.913, + "args": { + "External id": 296528,"Record function id": 0, "Ev Idx": 3151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368497267.409, "dur": 9.060, + "args": { + "External id": 296529,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368497288.627, "dur": 5.358, + "args": { + "External id": 296530,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368497309.541, "dur": 35687.080, + "args": { + "External id": 296531,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368497324.068, "dur": 35660.659, + "args": { + "External id": 296532,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368497362.309, "dur": 15.002, + "args": { + "External id": 296533,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368497383.465, "dur": 35556.656, + "args": { + "External id": 296534,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368497386.488, "dur": 35552.340, + "args": { + "External id": 296535,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368497390.325, "dur": 5.723, + "args": { + "External id": 296536,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368497397.806, "dur": 35536.056, + "args": { + "External id": 296537,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368533099.414, "dur": 11.049, + "args": { + "External id": 296538,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368533102.633, "dur": 7.480, + "args": { + "External id": 296539,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368533140.328, "dur": 393.339, + "args": { + "External id": 296540,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368533181.489, "dur": 346.423, + "args": { + "External id": 296541,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3164, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368533196.529, "dur": 325.944, + "args": { + "External id": 296542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368533554.981, "dur": 2.245, + "args": { + "External id": 296543,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3166, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368533654.725, "dur": 7.665, + "args": { + "External id": 296544,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368533711.112, "dur": 1.448, + "args": { + "External id": 296545,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368533727.984, "dur": 1.361, + "args": { + "External id": 296546,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368533741.606, "dur": 0.934, + "args": { + "External id": 296547,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368533754.236, "dur": 0.995, + "args": { + "External id": 296548,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368533766.501, "dur": 0.875, + "args": { + "External id": 296549,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368533779.107, "dur": 0.881, + "args": { + "External id": 296550,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368533791.817, "dur": 2.039, + "args": { + "External id": 296551,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368533804.483, "dur": 0.726, + "args": { + "External id": 296552,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368533905.476, "dur": 2866.826, + "args": { + "External id": 296553,"Record function id": 0, "Ev Idx": 3176 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.10)", "pid": 2070552, "tid": 2107648, + "ts": 5333368533926.273, "dur": 1068.310, + "args": { + "External id": 296554,"Record function id": 0, "Ev Idx": 3177 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2070552, "tid": 2107648, + "ts": 5333368533940.911, "dur": 357.756, + "args": { + "External id": 296555,"Record function id": 0, "Ev Idx": 3178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368534029.560, "dur": 4.223, + "args": { + "External id": 296556,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368534037.218, "dur": 1.121, + "args": { + "External id": 296557,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368534040.126, "dur": 1.047, + "args": { + "External id": 296558,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368534042.930, "dur": 0.829, + "args": { + "External id": 296559,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368534045.678, "dur": 1.122, + "args": { + "External id": 296560,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368534048.789, "dur": 0.849, + "args": { + "External id": 296561,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368534051.467, "dur": 1.922, + "args": { + "External id": 296562,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368534055.311, "dur": 0.981, + "args": { + "External id": 296563,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368534058.127, "dur": 1.369, + "args": { + "External id": 296564,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368534061.411, "dur": 0.792, + "args": { + "External id": 296565,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368534080.996, "dur": 183.722, + "args": { + "External id": 296566,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368534097.248, "dur": 161.737, + "args": { + "External id": 296567,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368534109.895, "dur": 13.542, + "args": { + "External id": 296568,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368534127.427, "dur": 98.585, + "args": { + "External id": 296569,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368534130.466, "dur": 95.114, + "args": { + "External id": 296570,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368534134.808, "dur": 6.807, + "args": { + "External id": 296571,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368534143.243, "dur": 80.920, + "args": { + "External id": 296572,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3195 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.9", "pid": 2070552, "tid": 2107648, + "ts": 5333368534395.897, "dur": 591.046, + "args": { + "External id": 296573,"Record function id": 0, "Ev Idx": 3196 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2070552, "tid": 2107648, + "ts": 5333368534414.071, "dur": 559.990, + "args": { + "External id": 296574,"Record function id": 0, "Ev Idx": 3197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368534480.883, "dur": 6.508, + "args": { + "External id": 296575,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368534502.968, "dur": 28.802, + "args": { + "External id": 296576,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368534508.078, "dur": 1.774, + "args": { + "External id": 296577,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368534512.061, "dur": 1.217, + "args": { + "External id": 296578,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368534514.930, "dur": 0.525, + "args": { + "External id": 296579,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368534516.959, "dur": 0.520, + "args": { + "External id": 296580,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368534519.438, "dur": 0.924, + "args": { + "External id": 296581,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368534521.663, "dur": 0.831, + "args": { + "External id": 296582,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368534523.683, "dur": 0.635, + "args": { + "External id": 296583,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368534525.866, "dur": 0.504, + "args": { + "External id": 296584,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368534527.463, "dur": 0.677, + "args": { + "External id": 296585,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368534542.602, "dur": 30.892, + "args": { + "External id": 296586,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368534606.291, "dur": 141.275, + "args": { + "External id": 296587,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368534616.803, "dur": 41.135, + "args": { + "External id": 296588,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368534665.053, "dur": 10.599, + "args": { + "External id": 296589,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368534669.062, "dur": 6.152, + "args": { + "External id": 296590,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368534672.704, "dur": 1.098, + "args": { + "External id": 296591,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368534685.263, "dur": 20.812, + "args": { + "External id": 296592,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368534687.188, "dur": 0.514, + "args": { + "External id": 296593,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368534689.309, "dur": 0.563, + "args": { + "External id": 296594,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368534691.100, "dur": 0.399, + "args": { + "External id": 296595,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368534692.980, "dur": 0.927, + "args": { + "External id": 296596,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368534695.166, "dur": 0.937, + "args": { + "External id": 296597,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368534697.141, "dur": 0.591, + "args": { + "External id": 296598,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368534699.196, "dur": 0.706, + "args": { + "External id": 296599,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368534701.169, "dur": 0.517, + "args": { + "External id": 296600,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368534702.645, "dur": 0.826, + "args": { + "External id": 296601,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368534717.787, "dur": 21.708, + "args": { + "External id": 296602,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368534793.545, "dur": 114.119, + "args": { + "External id": 296603,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368534817.178, "dur": 87.280, + "args": { + "External id": 296604,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3227, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368534826.077, "dur": 73.324, + "args": { + "External id": 296605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368534922.893, "dur": 1.795, + "args": { + "External id": 296606,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3229, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368535001.300, "dur": 1744.986, + "args": { + "External id": 296607,"Sequence number": 1209182, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3230 + } + }, + { + "ph": "f", "id": 50, "pid": 2070552, "tid": 2107648, "ts": 5333368535001.300, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368535114.053, "dur": 144.953, + "args": { + "External id": 296608,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368535307.602, "dur": 42.991, + "args": { + "External id": 296609,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368535369.545, "dur": 59.788, + "args": { + "External id": 296610,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368535439.927, "dur": 33.488, + "args": { + "External id": 296611,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368535479.845, "dur": 45.916, + "args": { + "External id": 296612,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368535532.480, "dur": 28.763, + "args": { + "External id": 296613,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368535568.045, "dur": 42.266, + "args": { + "External id": 296614,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368535677.041, "dur": 27.570, + "args": { + "External id": 296615,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368535725.020, "dur": 29.293, + "args": { + "External id": 296616,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368535776.059, "dur": 19.723, + "args": { + "External id": 296617,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368535808.656, "dur": 16.654, + "args": { + "External id": 296618,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368535834.229, "dur": 34.859, + "args": { + "External id": 296619,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368535872.365, "dur": 34.883, + "args": { + "External id": 296620,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368535936.787, "dur": 175.134, + "args": { + "External id": 296621,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368536010.195, "dur": 9.400, + "args": { + "External id": 296622,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368536021.519, "dur": 3.479, + "args": { + "External id": 296623,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368536146.364, "dur": 42.885, + "args": { + "External id": 296624,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368536210.569, "dur": 22.838, + "args": { + "External id": 296625,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368536244.390, "dur": 43.679, + "args": { + "External id": 296626,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368536293.846, "dur": 52.192, + "args": { + "External id": 296627,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368536360.900, "dur": 26.962, + "args": { + "External id": 296628,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368536392.684, "dur": 32.142, + "args": { + "External id": 296629,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368536430.291, "dur": 24.149, + "args": { + "External id": 296630,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368536461.706, "dur": 29.397, + "args": { + "External id": 296631,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368536514.362, "dur": 22.634, + "args": { + "External id": 296632,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368536555.760, "dur": 24.872, + "args": { + "External id": 296633,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368536596.134, "dur": 16.675, + "args": { + "External id": 296634,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368536661.514, "dur": 19.644, + "args": { + "External id": 296635,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368536698.989, "dur": 16.810, + "args": { + "External id": 296636,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368536794.991, "dur": 15.690, + "args": { + "External id": 296637,"Record function id": 0, "Ev Idx": 3260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368536798.383, "dur": 11.171, + "args": { + "External id": 296638,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368536802.838, "dur": 5.908, + "args": { + "External id": 296639,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368536804.070, "dur": 4.584, + "args": { + "External id": 296640,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368536814.419, "dur": 4.196, + "args": { + "External id": 296641,"Record function id": 0, "Ev Idx": 3264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368536815.778, "dur": 2.375, + "args": { + "External id": 296642,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368536816.392, "dur": 1.312, + "args": { + "External id": 296643,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368536816.745, "dur": 0.841, + "args": { + "External id": 296644,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368536821.931, "dur": 4.133, + "args": { + "External id": 296645,"Record function id": 0, "Ev Idx": 3268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368536823.302, "dur": 2.328, + "args": { + "External id": 296646,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368536823.761, "dur": 1.455, + "args": { + "External id": 296647,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368536824.384, "dur": 0.744, + "args": { + "External id": 296648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368536829.256, "dur": 3.949, + "args": { + "External id": 296649,"Record function id": 0, "Ev Idx": 3272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368536830.539, "dur": 2.246, + "args": { + "External id": 296650,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368536831.087, "dur": 1.281, + "args": { + "External id": 296651,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368536831.562, "dur": 0.731, + "args": { + "External id": 296652,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368536836.295, "dur": 3.872, + "args": { + "External id": 296653,"Record function id": 0, "Ev Idx": 3276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368536837.488, "dur": 2.268, + "args": { + "External id": 296654,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368536837.968, "dur": 1.393, + "args": { + "External id": 296655,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368536838.399, "dur": 0.896, + "args": { + "External id": 296656,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368536843.175, "dur": 3.972, + "args": { + "External id": 296657,"Record function id": 0, "Ev Idx": 3280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368536844.611, "dur": 2.121, + "args": { + "External id": 296658,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368536845.045, "dur": 1.262, + "args": { + "External id": 296659,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368536845.424, "dur": 0.810, + "args": { + "External id": 296660,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368536853.227, "dur": 4.677, + "args": { + "External id": 296661,"Record function id": 0, "Ev Idx": 3284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368536854.395, "dur": 3.061, + "args": { + "External id": 296662,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368536855.056, "dur": 2.011, + "args": { + "External id": 296663,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368536856.067, "dur": 0.924, + "args": { + "External id": 296664,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368536861.008, "dur": 4.793, + "args": { + "External id": 296665,"Record function id": 0, "Ev Idx": 3288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368536862.211, "dur": 3.189, + "args": { + "External id": 296666,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368536862.947, "dur": 2.020, + "args": { + "External id": 296667,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368536863.783, "dur": 1.110, + "args": { + "External id": 296668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368536868.913, "dur": 3.469, + "args": { + "External id": 296669,"Record function id": 0, "Ev Idx": 3292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368536869.979, "dur": 1.992, + "args": { + "External id": 296670,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368536870.419, "dur": 1.167, + "args": { + "External id": 296671,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368536870.794, "dur": 0.718, + "args": { + "External id": 296672,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368536876.440, "dur": 36851.111, + "args": { + "External id": 296673,"Record function id": 0, "Sequence number": 1209181, "Fwd thread id": 1, "Ev Idx": 3296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368536877.537, "dur": 36840.087, + "args": { + "External id": 296674,"Sequence number": 1209181, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3297 + } + }, + { + "ph": "f", "id": 51, "pid": 2070552, "tid": 2107648, "ts": 5333368536877.537, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.10)", "pid": 2070552, "tid": 2107648, + "ts": 5333368536911.496, "dur": 41.118, + "args": { + "External id": 296675,"Record function id": 0, "Ev Idx": 3298 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.10)", "pid": 2070552, "tid": 2107648, + "ts": 5333368536960.356, "dur": 72.171, + "args": { + "External id": 296676,"Record function id": 0, "Ev Idx": 3299 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.10)", "pid": 2070552, "tid": 2107648, + "ts": 5333368537038.486, "dur": 36670.402, + "args": { + "External id": 296677,"Record function id": 0, "Ev Idx": 3300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368537127.496, "dur": 6.924, + "args": { + "External id": 296678,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368537143.617, "dur": 8.360, + "args": { + "External id": 296679,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368537182.381, "dur": 35617.381, + "args": { + "External id": 296680,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368537200.482, "dur": 35588.719, + "args": { + "External id": 296681,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368537244.402, "dur": 19.919, + "args": { + "External id": 296682,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368537271.473, "dur": 35479.671, + "args": { + "External id": 296683,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368537275.790, "dur": 35474.534, + "args": { + "External id": 296684,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368537280.496, "dur": 7.766, + "args": { + "External id": 296685,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368537290.498, "dur": 35455.501, + "args": { + "External id": 296686,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368572900.189, "dur": 10.318, + "args": { + "External id": 296687,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368572903.220, "dur": 6.907, + "args": { + "External id": 296688,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368572939.130, "dur": 434.327, + "args": { + "External id": 296689,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368572964.805, "dur": 403.089, + "args": { + "External id": 296690,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3313, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368572975.996, "dur": 385.374, + "args": { + "External id": 296691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368573397.292, "dur": 2.348, + "args": { + "External id": 296692,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3315, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368573465.731, "dur": 7.132, + "args": { + "External id": 296693,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368573518.701, "dur": 1.433, + "args": { + "External id": 296694,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368573535.013, "dur": 1.482, + "args": { + "External id": 296695,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368573548.360, "dur": 0.823, + "args": { + "External id": 296696,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368573560.125, "dur": 0.869, + "args": { + "External id": 296697,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368573570.731, "dur": 0.912, + "args": { + "External id": 296698,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368573582.534, "dur": 0.891, + "args": { + "External id": 296699,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368573594.836, "dur": 1.645, + "args": { + "External id": 296700,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368573606.939, "dur": 0.852, + "args": { + "External id": 296701,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368573744.825, "dur": 2797.626, + "args": { + "External id": 296702,"Record function id": 0, "Ev Idx": 3325 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.9)", "pid": 2070552, "tid": 2107648, + "ts": 5333368573766.678, "dur": 1061.408, + "args": { + "External id": 296703,"Record function id": 0, "Ev Idx": 3326 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2070552, "tid": 2107648, + "ts": 5333368573783.113, "dur": 313.962, + "args": { + "External id": 296704,"Record function id": 0, "Ev Idx": 3327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368573870.651, "dur": 4.934, + "args": { + "External id": 296705,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368573879.107, "dur": 0.813, + "args": { + "External id": 296706,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368573881.914, "dur": 0.893, + "args": { + "External id": 296707,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368573884.770, "dur": 0.853, + "args": { + "External id": 296708,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368573887.449, "dur": 1.125, + "args": { + "External id": 296709,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368573890.102, "dur": 0.899, + "args": { + "External id": 296710,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368573892.695, "dur": 1.506, + "args": { + "External id": 296711,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368573895.898, "dur": 0.901, + "args": { + "External id": 296712,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368573898.525, "dur": 1.031, + "args": { + "External id": 296713,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368573901.391, "dur": 0.745, + "args": { + "External id": 296714,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368573921.451, "dur": 147.373, + "args": { + "External id": 296715,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368573938.194, "dur": 126.571, + "args": { + "External id": 296716,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368573951.531, "dur": 12.954, + "args": { + "External id": 296717,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368573968.232, "dur": 68.201, + "args": { + "External id": 296718,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368573970.598, "dur": 65.535, + "args": { + "External id": 296719,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368573974.626, "dur": 6.272, + "args": { + "External id": 296720,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368573982.680, "dur": 52.682, + "args": { + "External id": 296721,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3344 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.8", "pid": 2070552, "tid": 2107648, + "ts": 5333368574211.609, "dur": 609.006, + "args": { + "External id": 296722,"Record function id": 0, "Ev Idx": 3345 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2070552, "tid": 2107648, + "ts": 5333368574233.241, "dur": 574.274, + "args": { + "External id": 296723,"Record function id": 0, "Ev Idx": 3346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368574300.610, "dur": 6.571, + "args": { + "External id": 296724,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368574323.521, "dur": 28.798, + "args": { + "External id": 296725,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368574328.254, "dur": 2.440, + "args": { + "External id": 296726,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368574332.366, "dur": 0.601, + "args": { + "External id": 296727,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368574334.507, "dur": 0.774, + "args": { + "External id": 296728,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368574338.487, "dur": 0.357, + "args": { + "External id": 296729,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368574340.073, "dur": 0.748, + "args": { + "External id": 296730,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368574341.739, "dur": 0.475, + "args": { + "External id": 296731,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368574344.101, "dur": 0.741, + "args": { + "External id": 296732,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368574345.721, "dur": 0.645, + "args": { + "External id": 296733,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368574347.516, "dur": 1.639, + "args": { + "External id": 296734,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368574363.481, "dur": 35.089, + "args": { + "External id": 296735,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368574429.258, "dur": 98.419, + "args": { + "External id": 296736,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368574439.628, "dur": 3.245, + "args": { + "External id": 296737,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368574447.482, "dur": 9.937, + "args": { + "External id": 296738,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368574451.880, "dur": 5.151, + "args": { + "External id": 296739,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368574455.219, "dur": 0.617, + "args": { + "External id": 296740,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368574464.408, "dur": 23.755, + "args": { + "External id": 296741,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368574466.405, "dur": 0.617, + "args": { + "External id": 296742,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368574468.711, "dur": 0.740, + "args": { + "External id": 296743,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368574470.443, "dur": 1.943, + "args": { + "External id": 296744,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368574473.526, "dur": 0.626, + "args": { + "External id": 296745,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368574475.164, "dur": 0.697, + "args": { + "External id": 296746,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368574477.984, "dur": 0.702, + "args": { + "External id": 296747,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368574479.628, "dur": 0.603, + "args": { + "External id": 296748,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368574481.326, "dur": 0.545, + "args": { + "External id": 296749,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368574483.165, "dur": 0.491, + "args": { + "External id": 296750,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368574498.414, "dur": 21.347, + "args": { + "External id": 296751,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368574569.243, "dur": 164.352, + "args": { + "External id": 296752,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368574592.219, "dur": 137.562, + "args": { + "External id": 296753,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3376, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368574600.772, "dur": 124.356, + "args": { + "External id": 296754,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368574750.996, "dur": 1.954, + "args": { + "External id": 296755,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3378, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368574836.241, "dur": 1685.291, + "args": { + "External id": 296756,"Sequence number": 1209180, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3379 + } + }, + { + "ph": "f", "id": 52, "pid": 2070552, "tid": 2107648, "ts": 5333368574836.241, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368574949.400, "dur": 106.453, + "args": { + "External id": 296757,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368575094.628, "dur": 39.967, + "args": { + "External id": 296758,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368575151.447, "dur": 83.708, + "args": { + "External id": 296759,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368575253.164, "dur": 37.000, + "args": { + "External id": 296760,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368575297.346, "dur": 46.361, + "args": { + "External id": 296761,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368575350.617, "dur": 28.042, + "args": { + "External id": 296762,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368575386.124, "dur": 41.358, + "args": { + "External id": 296763,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368575453.509, "dur": 27.427, + "args": { + "External id": 296764,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368575500.484, "dur": 27.698, + "args": { + "External id": 296765,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368575550.058, "dur": 18.785, + "args": { + "External id": 296766,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368575582.710, "dur": 15.995, + "args": { + "External id": 296767,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368575606.795, "dur": 69.451, + "args": { + "External id": 296768,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368575681.698, "dur": 36.402, + "args": { + "External id": 296769,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368575753.427, "dur": 183.115, + "args": { + "External id": 296770,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368575834.143, "dur": 6.751, + "args": { + "External id": 296771,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368575842.930, "dur": 3.469, + "args": { + "External id": 296772,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368575970.079, "dur": 24.191, + "args": { + "External id": 296773,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368576005.304, "dur": 15.258, + "args": { + "External id": 296774,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368576028.211, "dur": 33.675, + "args": { + "External id": 296775,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368576067.484, "dur": 33.913, + "args": { + "External id": 296776,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368576108.284, "dur": 21.501, + "args": { + "External id": 296777,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368576135.801, "dur": 73.448, + "args": { + "External id": 296778,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368576230.897, "dur": 32.970, + "args": { + "External id": 296779,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368576271.986, "dur": 35.669, + "args": { + "External id": 296780,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368576327.471, "dur": 22.449, + "args": { + "External id": 296781,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368576371.293, "dur": 25.695, + "args": { + "External id": 296782,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368576413.117, "dur": 17.067, + "args": { + "External id": 296783,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368576446.988, "dur": 17.696, + "args": { + "External id": 296784,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368576475.993, "dur": 16.331, + "args": { + "External id": 296785,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368576565.442, "dur": 14.888, + "args": { + "External id": 296786,"Record function id": 0, "Ev Idx": 3409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368576568.784, "dur": 10.625, + "args": { + "External id": 296787,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368576572.939, "dur": 5.690, + "args": { + "External id": 296788,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368576574.095, "dur": 4.438, + "args": { + "External id": 296789,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368576584.052, "dur": 4.525, + "args": { + "External id": 296790,"Record function id": 0, "Ev Idx": 3413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368576585.398, "dur": 2.752, + "args": { + "External id": 296791,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368576585.936, "dur": 1.781, + "args": { + "External id": 296792,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368576586.476, "dur": 1.147, + "args": { + "External id": 296793,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368576591.772, "dur": 3.957, + "args": { + "External id": 296794,"Record function id": 0, "Ev Idx": 3417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368576592.949, "dur": 2.376, + "args": { + "External id": 296795,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368576593.479, "dur": 1.437, + "args": { + "External id": 296796,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368576593.918, "dur": 0.913, + "args": { + "External id": 296797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368576598.832, "dur": 3.765, + "args": { + "External id": 296798,"Record function id": 0, "Ev Idx": 3421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368576599.775, "dur": 2.416, + "args": { + "External id": 296799,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368576600.235, "dur": 1.537, + "args": { + "External id": 296800,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368576600.557, "dur": 1.141, + "args": { + "External id": 296801,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368576605.579, "dur": 3.575, + "args": { + "External id": 296802,"Record function id": 0, "Ev Idx": 3425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368576606.586, "dur": 2.142, + "args": { + "External id": 296803,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368576607.064, "dur": 1.247, + "args": { + "External id": 296804,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368576607.429, "dur": 0.808, + "args": { + "External id": 296805,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368576612.164, "dur": 4.120, + "args": { + "External id": 296806,"Record function id": 0, "Ev Idx": 3429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368576613.489, "dur": 2.381, + "args": { + "External id": 296807,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368576613.924, "dur": 1.425, + "args": { + "External id": 296808,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368576614.624, "dur": 0.647, + "args": { + "External id": 296809,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368576656.612, "dur": 7.480, + "args": { + "External id": 296810,"Record function id": 0, "Ev Idx": 3433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368576658.843, "dur": 4.517, + "args": { + "External id": 296811,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368576660.003, "dur": 2.544, + "args": { + "External id": 296812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368576660.754, "dur": 1.582, + "args": { + "External id": 296813,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368576667.621, "dur": 3.887, + "args": { + "External id": 296814,"Record function id": 0, "Ev Idx": 3437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368576668.634, "dur": 2.447, + "args": { + "External id": 296815,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368576669.148, "dur": 1.351, + "args": { + "External id": 296816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368576669.545, "dur": 0.881, + "args": { + "External id": 296817,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368576674.914, "dur": 4.064, + "args": { + "External id": 296818,"Record function id": 0, "Ev Idx": 3441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368576675.935, "dur": 2.644, + "args": { + "External id": 296819,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368576676.448, "dur": 1.727, + "args": { + "External id": 296820,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368576677.196, "dur": 0.905, + "args": { + "External id": 296821,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368576683.030, "dur": 37362.116, + "args": { + "External id": 296822,"Record function id": 0, "Sequence number": 1209179, "Fwd thread id": 1, "Ev Idx": 3445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368576684.363, "dur": 37351.609, + "args": { + "External id": 296823,"Sequence number": 1209179, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3446 + } + }, + { + "ph": "f", "id": 53, "pid": 2070552, "tid": 2107648, "ts": 5333368576684.363, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.9)", "pid": 2070552, "tid": 2107648, + "ts": 5333368576714.308, "dur": 40.066, + "args": { + "External id": 296824,"Record function id": 0, "Ev Idx": 3447 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.9)", "pid": 2070552, "tid": 2107648, + "ts": 5333368576762.877, "dur": 70.137, + "args": { + "External id": 296825,"Record function id": 0, "Ev Idx": 3448 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.9)", "pid": 2070552, "tid": 2107648, + "ts": 5333368576839.554, "dur": 37189.067, + "args": { + "External id": 296826,"Record function id": 0, "Ev Idx": 3449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368576931.456, "dur": 7.505, + "args": { + "External id": 296827,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368576948.831, "dur": 4.756, + "args": { + "External id": 296828,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368576967.288, "dur": 36189.177, + "args": { + "External id": 296829,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368576980.553, "dur": 36164.547, + "args": { + "External id": 296830,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368577021.043, "dur": 14.899, + "args": { + "External id": 296831,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368577042.005, "dur": 36058.510, + "args": { + "External id": 296832,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368577046.078, "dur": 36053.569, + "args": { + "External id": 296833,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368577049.522, "dur": 5.000, + "args": { + "External id": 296834,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368577056.714, "dur": 36038.895, + "args": { + "External id": 296835,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368613282.087, "dur": 10.612, + "args": { + "External id": 296836,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368613285.202, "dur": 6.998, + "args": { + "External id": 296837,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368613321.300, "dur": 411.126, + "args": { + "External id": 296838,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368613346.783, "dur": 379.478, + "args": { + "External id": 296839,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3462, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368613360.032, "dur": 360.029, + "args": { + "External id": 296840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368613755.343, "dur": 2.370, + "args": { + "External id": 296841,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3464, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368613824.128, "dur": 7.073, + "args": { + "External id": 296842,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368613876.518, "dur": 1.445, + "args": { + "External id": 296843,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368613892.876, "dur": 1.306, + "args": { + "External id": 296844,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368613905.384, "dur": 0.938, + "args": { + "External id": 296845,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368613917.375, "dur": 1.097, + "args": { + "External id": 296846,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368613929.349, "dur": 0.950, + "args": { + "External id": 296847,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368613942.840, "dur": 1.063, + "args": { + "External id": 296848,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368613954.866, "dur": 1.727, + "args": { + "External id": 296849,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368613966.679, "dur": 0.794, + "args": { + "External id": 296850,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368614059.566, "dur": 2905.320, + "args": { + "External id": 296851,"Record function id": 0, "Ev Idx": 3474 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.8)", "pid": 2070552, "tid": 2107648, + "ts": 5333368614079.251, "dur": 1075.972, + "args": { + "External id": 296852,"Record function id": 0, "Ev Idx": 3475 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2070552, "tid": 2107648, + "ts": 5333368614093.629, "dur": 358.694, + "args": { + "External id": 296853,"Record function id": 0, "Ev Idx": 3476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368614204.077, "dur": 5.389, + "args": { + "External id": 296854,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368614214.426, "dur": 1.242, + "args": { + "External id": 296855,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368614218.577, "dur": 1.477, + "args": { + "External id": 296856,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368614222.316, "dur": 1.086, + "args": { + "External id": 296857,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368614226.336, "dur": 1.203, + "args": { + "External id": 296858,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368614230.554, "dur": 2.061, + "args": { + "External id": 296859,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368614235.350, "dur": 1.637, + "args": { + "External id": 296860,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368614238.712, "dur": 1.175, + "args": { + "External id": 296861,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368614241.722, "dur": 1.406, + "args": { + "External id": 296862,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368614245.137, "dur": 1.188, + "args": { + "External id": 296863,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368614266.585, "dur": 154.570, + "args": { + "External id": 296864,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368614282.882, "dur": 133.523, + "args": { + "External id": 296865,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368614298.436, "dur": 14.030, + "args": { + "External id": 296866,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368614316.816, "dur": 69.892, + "args": { + "External id": 296867,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368614319.204, "dur": 67.242, + "args": { + "External id": 296868,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368614323.022, "dur": 6.266, + "args": { + "External id": 296869,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368614330.938, "dur": 55.053, + "args": { + "External id": 296870,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3493 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.7", "pid": 2070552, "tid": 2107648, + "ts": 5333368614546.907, "dur": 600.581, + "args": { + "External id": 296871,"Record function id": 0, "Ev Idx": 3494 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2070552, "tid": 2107648, + "ts": 5333368614565.485, "dur": 566.126, + "args": { + "External id": 296872,"Record function id": 0, "Ev Idx": 3495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368614670.210, "dur": 6.769, + "args": { + "External id": 296873,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368614693.924, "dur": 34.754, + "args": { + "External id": 296874,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368614699.450, "dur": 3.605, + "args": { + "External id": 296875,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368614704.752, "dur": 0.528, + "args": { + "External id": 296876,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368614706.829, "dur": 0.614, + "args": { + "External id": 296877,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368614710.595, "dur": 0.895, + "args": { + "External id": 296878,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368614712.531, "dur": 0.774, + "args": { + "External id": 296879,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368614714.658, "dur": 0.556, + "args": { + "External id": 296880,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368614717.083, "dur": 0.902, + "args": { + "External id": 296881,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368614719.541, "dur": 0.626, + "args": { + "External id": 296882,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368614721.368, "dur": 1.727, + "args": { + "External id": 296883,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368614739.581, "dur": 37.758, + "args": { + "External id": 296884,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368614811.091, "dur": 103.309, + "args": { + "External id": 296885,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368614820.776, "dur": 4.410, + "args": { + "External id": 296886,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368614830.002, "dur": 10.149, + "args": { + "External id": 296887,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368614834.074, "dur": 5.649, + "args": { + "External id": 296888,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368614837.228, "dur": 0.863, + "args": { + "External id": 296889,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368614846.544, "dur": 28.490, + "args": { + "External id": 296890,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368614848.373, "dur": 0.354, + "args": { + "External id": 296891,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368614850.624, "dur": 1.098, + "args": { + "External id": 296892,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368614853.157, "dur": 0.624, + "args": { + "External id": 296893,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368614855.261, "dur": 0.686, + "args": { + "External id": 296894,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368614858.173, "dur": 0.880, + "args": { + "External id": 296895,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368614860.223, "dur": 0.667, + "args": { + "External id": 296896,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368614862.300, "dur": 0.969, + "args": { + "External id": 296897,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368614864.775, "dur": 0.554, + "args": { + "External id": 296898,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368614866.670, "dur": 0.727, + "args": { + "External id": 296899,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368614885.533, "dur": 20.436, + "args": { + "External id": 296900,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368614959.861, "dur": 109.996, + "args": { + "External id": 296901,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368614981.721, "dur": 84.759, + "args": { + "External id": 296902,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3525, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368614990.764, "dur": 71.498, + "args": { + "External id": 296903,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368615083.172, "dur": 1.567, + "args": { + "External id": 296904,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3527, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368615162.521, "dur": 1779.136, + "args": { + "External id": 296905,"Sequence number": 1209178, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3528 + } + }, + { + "ph": "f", "id": 54, "pid": 2070552, "tid": 2107648, "ts": 5333368615162.521, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368615321.380, "dur": 114.613, + "args": { + "External id": 296906,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368615479.388, "dur": 44.875, + "args": { + "External id": 296907,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368615541.818, "dur": 50.852, + "args": { + "External id": 296908,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368615602.836, "dur": 75.682, + "args": { + "External id": 296909,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368615689.932, "dur": 55.355, + "args": { + "External id": 296910,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368615752.401, "dur": 30.106, + "args": { + "External id": 296911,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368615791.020, "dur": 42.051, + "args": { + "External id": 296912,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368615857.414, "dur": 26.328, + "args": { + "External id": 296913,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368615904.079, "dur": 31.076, + "args": { + "External id": 296914,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368615958.446, "dur": 19.885, + "args": { + "External id": 296915,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368615991.601, "dur": 18.009, + "args": { + "External id": 296916,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368616018.285, "dur": 28.620, + "args": { + "External id": 296917,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368616049.778, "dur": 32.107, + "args": { + "External id": 296918,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368616112.953, "dur": 207.731, + "args": { + "External id": 296919,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368616213.992, "dur": 9.167, + "args": { + "External id": 296920,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368616225.610, "dur": 4.066, + "args": { + "External id": 296921,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368616356.721, "dur": 27.421, + "args": { + "External id": 296922,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368616396.449, "dur": 15.840, + "args": { + "External id": 296923,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368616421.880, "dur": 46.487, + "args": { + "External id": 296924,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368616474.641, "dur": 40.056, + "args": { + "External id": 296925,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368616521.221, "dur": 38.762, + "args": { + "External id": 296926,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368616570.416, "dur": 38.811, + "args": { + "External id": 296927,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368616616.324, "dur": 61.935, + "args": { + "External id": 296928,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368616688.886, "dur": 32.324, + "args": { + "External id": 296929,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368616744.982, "dur": 24.422, + "args": { + "External id": 296930,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368616786.993, "dur": 29.838, + "args": { + "External id": 296931,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368616831.690, "dur": 17.557, + "args": { + "External id": 296932,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368616867.177, "dur": 15.583, + "args": { + "External id": 296933,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368616895.805, "dur": 16.613, + "args": { + "External id": 296934,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368616987.844, "dur": 16.509, + "args": { + "External id": 296935,"Record function id": 0, "Ev Idx": 3558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368616991.257, "dur": 12.122, + "args": { + "External id": 296936,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368616995.564, "dur": 6.968, + "args": { + "External id": 296937,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368616996.859, "dur": 5.580, + "args": { + "External id": 296938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368617008.158, "dur": 4.448, + "args": { + "External id": 296939,"Record function id": 0, "Ev Idx": 3562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368617009.412, "dur": 2.732, + "args": { + "External id": 296940,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368617009.972, "dur": 1.596, + "args": { + "External id": 296941,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368617010.332, "dur": 1.162, + "args": { + "External id": 296942,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368617015.798, "dur": 3.753, + "args": { + "External id": 296943,"Record function id": 0, "Ev Idx": 3566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368617016.994, "dur": 2.143, + "args": { + "External id": 296944,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368617017.505, "dur": 1.230, + "args": { + "External id": 296945,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368617017.800, "dur": 0.847, + "args": { + "External id": 296946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368617022.671, "dur": 3.727, + "args": { + "External id": 296947,"Record function id": 0, "Ev Idx": 3570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368617023.657, "dur": 2.315, + "args": { + "External id": 296948,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368617024.186, "dur": 1.373, + "args": { + "External id": 296949,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368617024.551, "dur": 0.911, + "args": { + "External id": 296950,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368617029.460, "dur": 4.148, + "args": { + "External id": 296951,"Record function id": 0, "Ev Idx": 3574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368617030.585, "dur": 2.510, + "args": { + "External id": 296952,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368617031.284, "dur": 1.213, + "args": { + "External id": 296953,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368617031.586, "dur": 0.823, + "args": { + "External id": 296954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368617036.740, "dur": 4.141, + "args": { + "External id": 296955,"Record function id": 0, "Ev Idx": 3578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368617037.923, "dur": 2.548, + "args": { + "External id": 296956,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368617038.415, "dur": 1.401, + "args": { + "External id": 296957,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368617038.992, "dur": 0.753, + "args": { + "External id": 296958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368617044.036, "dur": 13.409, + "args": { + "External id": 296959,"Record function id": 0, "Ev Idx": 3582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368617044.999, "dur": 12.004, + "args": { + "External id": 296960,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368617055.409, "dur": 1.177, + "args": { + "External id": 296961,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368617055.766, "dur": 0.736, + "args": { + "External id": 296962,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368617060.902, "dur": 3.812, + "args": { + "External id": 296963,"Record function id": 0, "Ev Idx": 3586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368617061.891, "dur": 2.379, + "args": { + "External id": 296964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368617062.342, "dur": 1.304, + "args": { + "External id": 296965,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368617062.617, "dur": 0.963, + "args": { + "External id": 296966,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368617067.929, "dur": 3.716, + "args": { + "External id": 296967,"Record function id": 0, "Ev Idx": 3590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368617068.943, "dur": 2.285, + "args": { + "External id": 296968,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368617069.416, "dur": 1.127, + "args": { + "External id": 296969,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368617069.731, "dur": 0.738, + "args": { + "External id": 296970,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368617075.320, "dur": 36453.879, + "args": { + "External id": 296971,"Record function id": 0, "Sequence number": 1209177, "Fwd thread id": 1, "Ev Idx": 3594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368617076.904, "dur": 36442.820, + "args": { + "External id": 296972,"Sequence number": 1209177, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3595 + } + }, + { + "ph": "f", "id": 55, "pid": 2070552, "tid": 2107648, "ts": 5333368617076.904, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.8)", "pid": 2070552, "tid": 2107648, + "ts": 5333368617109.537, "dur": 42.719, + "args": { + "External id": 296973,"Record function id": 0, "Ev Idx": 3596 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.8)", "pid": 2070552, "tid": 2107648, + "ts": 5333368617159.902, "dur": 112.597, + "args": { + "External id": 296974,"Record function id": 0, "Ev Idx": 3597 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.8)", "pid": 2070552, "tid": 2107648, + "ts": 5333368617282.927, "dur": 36229.264, + "args": { + "External id": 296975,"Record function id": 0, "Ev Idx": 3598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368617387.838, "dur": 8.780, + "args": { + "External id": 296976,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368617407.209, "dur": 5.493, + "args": { + "External id": 296977,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368617427.164, "dur": 35259.053, + "args": { + "External id": 296978,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368617441.783, "dur": 35233.308, + "args": { + "External id": 296979,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368617489.825, "dur": 18.039, + "args": { + "External id": 296980,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368617514.088, "dur": 35096.788, + "args": { + "External id": 296981,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368617516.722, "dur": 35093.360, + "args": { + "External id": 296982,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368617521.211, "dur": 5.955, + "args": { + "External id": 296983,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368617528.808, "dur": 35076.736, + "args": { + "External id": 296984,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368652787.017, "dur": 11.262, + "args": { + "External id": 296985,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368652790.281, "dur": 7.679, + "args": { + "External id": 296986,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368652827.785, "dur": 376.838, + "args": { + "External id": 296987,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368652853.866, "dur": 345.254, + "args": { + "External id": 296988,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3611, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368652864.476, "dur": 327.032, + "args": { + "External id": 296989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368653230.012, "dur": 3.169, + "args": { + "External id": 296990,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3613, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368653307.168, "dur": 6.831, + "args": { + "External id": 296991,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368653357.686, "dur": 1.312, + "args": { + "External id": 296992,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368653373.172, "dur": 1.193, + "args": { + "External id": 296993,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368653385.540, "dur": 0.924, + "args": { + "External id": 296994,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368653400.600, "dur": 0.988, + "args": { + "External id": 296995,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368653412.083, "dur": 1.426, + "args": { + "External id": 296996,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368653424.288, "dur": 1.103, + "args": { + "External id": 296997,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368653436.197, "dur": 1.226, + "args": { + "External id": 296998,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368653447.100, "dur": 1.091, + "args": { + "External id": 296999,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368653544.122, "dur": 2865.178, + "args": { + "External id": 297000,"Record function id": 0, "Ev Idx": 3623 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.7)", "pid": 2070552, "tid": 2107648, + "ts": 5333368653564.842, "dur": 1105.138, + "args": { + "External id": 297001,"Record function id": 0, "Ev Idx": 3624 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2070552, "tid": 2107648, + "ts": 5333368653578.255, "dur": 357.396, + "args": { + "External id": 297002,"Record function id": 0, "Ev Idx": 3625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368653700.706, "dur": 4.976, + "args": { + "External id": 297003,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368653709.480, "dur": 1.395, + "args": { + "External id": 297004,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368653712.589, "dur": 2.099, + "args": { + "External id": 297005,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368653716.094, "dur": 1.535, + "args": { + "External id": 297006,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368653719.540, "dur": 1.889, + "args": { + "External id": 297007,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368653722.837, "dur": 1.809, + "args": { + "External id": 297008,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368653725.916, "dur": 1.773, + "args": { + "External id": 297009,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368653729.449, "dur": 1.663, + "args": { + "External id": 297010,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368653732.552, "dur": 1.625, + "args": { + "External id": 297011,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368653735.541, "dur": 1.334, + "args": { + "External id": 297012,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368653755.261, "dur": 150.763, + "args": { + "External id": 297013,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368653771.619, "dur": 130.037, + "args": { + "External id": 297014,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368653785.419, "dur": 12.588, + "args": { + "External id": 297015,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368653801.735, "dur": 71.187, + "args": { + "External id": 297016,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368653804.013, "dur": 68.654, + "args": { + "External id": 297017,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368653808.072, "dur": 6.752, + "args": { + "External id": 297018,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368653816.346, "dur": 55.585, + "args": { + "External id": 297019,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3642 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.6", "pid": 2070552, "tid": 2107648, + "ts": 5333368654026.442, "dur": 591.681, + "args": { + "External id": 297020,"Record function id": 0, "Ev Idx": 3643 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2070552, "tid": 2107648, + "ts": 5333368654043.992, "dur": 562.646, + "args": { + "External id": 297021,"Record function id": 0, "Ev Idx": 3644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368654105.828, "dur": 5.411, + "args": { + "External id": 297022,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368654127.385, "dur": 32.779, + "args": { + "External id": 297023,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368654131.806, "dur": 1.928, + "args": { + "External id": 297024,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368654135.432, "dur": 0.983, + "args": { + "External id": 297025,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368654138.121, "dur": 0.765, + "args": { + "External id": 297026,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368654140.450, "dur": 1.098, + "args": { + "External id": 297027,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368654143.030, "dur": 0.698, + "args": { + "External id": 297028,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368654145.353, "dur": 0.674, + "args": { + "External id": 297029,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368654147.559, "dur": 0.943, + "args": { + "External id": 297030,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368654150.288, "dur": 0.666, + "args": { + "External id": 297031,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368654152.562, "dur": 0.805, + "args": { + "External id": 297032,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368654192.613, "dur": 41.824, + "args": { + "External id": 297033,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368654274.148, "dur": 106.622, + "args": { + "External id": 297034,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368654285.079, "dur": 4.890, + "args": { + "External id": 297035,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368654294.629, "dur": 12.125, + "args": { + "External id": 297036,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368654299.135, "dur": 7.153, + "args": { + "External id": 297037,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368654303.347, "dur": 1.013, + "args": { + "External id": 297038,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368654314.383, "dur": 27.935, + "args": { + "External id": 297039,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368654316.275, "dur": 0.966, + "args": { + "External id": 297040,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368654319.116, "dur": 1.143, + "args": { + "External id": 297041,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368654321.603, "dur": 0.882, + "args": { + "External id": 297042,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368654324.074, "dur": 1.124, + "args": { + "External id": 297043,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368654326.357, "dur": 0.719, + "args": { + "External id": 297044,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368654328.489, "dur": 0.699, + "args": { + "External id": 297045,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368654331.193, "dur": 0.988, + "args": { + "External id": 297046,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368654333.318, "dur": 0.804, + "args": { + "External id": 297047,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368654335.690, "dur": 0.625, + "args": { + "External id": 297048,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368654352.731, "dur": 19.085, + "args": { + "External id": 297049,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368654428.421, "dur": 113.449, + "args": { + "External id": 297050,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368654451.067, "dur": 87.513, + "args": { + "External id": 297051,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3674, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368654460.827, "dur": 73.363, + "args": { + "External id": 297052,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368654556.451, "dur": 2.099, + "args": { + "External id": 297053,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3676, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368654678.446, "dur": 1707.137, + "args": { + "External id": 297054,"Sequence number": 1209176, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3677 + } + }, + { + "ph": "f", "id": 56, "pid": 2070552, "tid": 2107648, "ts": 5333368654678.446, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368654793.415, "dur": 110.882, + "args": { + "External id": 297055,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368654943.611, "dur": 40.548, + "args": { + "External id": 297056,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368655000.932, "dur": 50.031, + "args": { + "External id": 297057,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368655060.749, "dur": 32.251, + "args": { + "External id": 297058,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368655099.474, "dur": 46.392, + "args": { + "External id": 297059,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368655152.789, "dur": 51.316, + "args": { + "External id": 297060,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368655218.572, "dur": 56.920, + "args": { + "External id": 297061,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368655306.311, "dur": 27.297, + "args": { + "External id": 297062,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368655355.601, "dur": 30.415, + "args": { + "External id": 297063,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368655410.709, "dur": 19.056, + "args": { + "External id": 297064,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368655443.144, "dur": 15.871, + "args": { + "External id": 297065,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368655467.352, "dur": 30.133, + "args": { + "External id": 297066,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368655500.436, "dur": 32.151, + "args": { + "External id": 297067,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368655560.566, "dur": 218.386, + "args": { + "External id": 297068,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368655676.892, "dur": 7.961, + "args": { + "External id": 297069,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368655686.866, "dur": 3.200, + "args": { + "External id": 297070,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368655812.431, "dur": 25.848, + "args": { + "External id": 297071,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368655850.933, "dur": 15.702, + "args": { + "External id": 297072,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368655874.813, "dur": 43.338, + "args": { + "External id": 297073,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368655924.007, "dur": 55.077, + "args": { + "External id": 297074,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368655996.388, "dur": 28.154, + "args": { + "External id": 297075,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368656029.349, "dur": 32.357, + "args": { + "External id": 297076,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368656070.686, "dur": 24.067, + "args": { + "External id": 297077,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368656101.413, "dur": 29.079, + "args": { + "External id": 297078,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368656150.956, "dur": 45.603, + "args": { + "External id": 297079,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368656223.500, "dur": 34.752, + "args": { + "External id": 297080,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368656276.421, "dur": 17.947, + "args": { + "External id": 297081,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368656310.294, "dur": 15.557, + "args": { + "External id": 297082,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368656338.937, "dur": 17.011, + "args": { + "External id": 297083,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368656432.380, "dur": 16.203, + "args": { + "External id": 297084,"Record function id": 0, "Ev Idx": 3707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368656435.361, "dur": 12.196, + "args": { + "External id": 297085,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368656439.943, "dur": 6.767, + "args": { + "External id": 297086,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368656441.463, "dur": 5.158, + "args": { + "External id": 297087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368656452.354, "dur": 4.452, + "args": { + "External id": 297088,"Record function id": 0, "Ev Idx": 3711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368656453.603, "dur": 2.743, + "args": { + "External id": 297089,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368656454.582, "dur": 1.234, + "args": { + "External id": 297090,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368656454.891, "dur": 0.854, + "args": { + "External id": 297091,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368656459.963, "dur": 3.912, + "args": { + "External id": 297092,"Record function id": 0, "Ev Idx": 3715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368656461.089, "dur": 2.300, + "args": { + "External id": 297093,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368656461.625, "dur": 1.242, + "args": { + "External id": 297094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368656461.927, "dur": 0.851, + "args": { + "External id": 297095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368656467.041, "dur": 4.056, + "args": { + "External id": 297096,"Record function id": 0, "Ev Idx": 3719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368656468.147, "dur": 2.502, + "args": { + "External id": 297097,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368656468.698, "dur": 1.541, + "args": { + "External id": 297098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368656469.085, "dur": 1.056, + "args": { + "External id": 297099,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368656474.217, "dur": 4.045, + "args": { + "External id": 297100,"Record function id": 0, "Ev Idx": 3723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368656475.258, "dur": 2.572, + "args": { + "External id": 297101,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368656475.932, "dur": 1.455, + "args": { + "External id": 297102,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368656476.216, "dur": 1.086, + "args": { + "External id": 297103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368656481.321, "dur": 3.601, + "args": { + "External id": 297104,"Record function id": 0, "Ev Idx": 3727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368656482.235, "dur": 2.261, + "args": { + "External id": 297105,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368656482.802, "dur": 1.261, + "args": { + "External id": 297106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368656483.194, "dur": 0.781, + "args": { + "External id": 297107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368656487.999, "dur": 4.641, + "args": { + "External id": 297108,"Record function id": 0, "Ev Idx": 3731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368656489.467, "dur": 2.756, + "args": { + "External id": 297109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368656490.476, "dur": 1.113, + "args": { + "External id": 297110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368656490.740, "dur": 0.775, + "args": { + "External id": 297111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368656495.935, "dur": 3.804, + "args": { + "External id": 297112,"Record function id": 0, "Ev Idx": 3735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368656496.846, "dur": 2.471, + "args": { + "External id": 297113,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368656497.620, "dur": 1.285, + "args": { + "External id": 297114,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368656497.890, "dur": 0.941, + "args": { + "External id": 297115,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368656503.074, "dur": 4.540, + "args": { + "External id": 297116,"Record function id": 0, "Ev Idx": 3739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368656504.196, "dur": 3.003, + "args": { + "External id": 297117,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368656505.355, "dur": 1.291, + "args": { + "External id": 297118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368656505.732, "dur": 0.844, + "args": { + "External id": 297119,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368656511.588, "dur": 38761.406, + "args": { + "External id": 297120,"Record function id": 0, "Sequence number": 1209175, "Fwd thread id": 1, "Ev Idx": 3743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368656512.874, "dur": 38749.018, + "args": { + "External id": 297121,"Sequence number": 1209175, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3744 + } + }, + { + "ph": "f", "id": 57, "pid": 2070552, "tid": 2107648, "ts": 5333368656512.874, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.7)", "pid": 2070552, "tid": 2107648, + "ts": 5333368656541.944, "dur": 38.560, + "args": { + "External id": 297122,"Record function id": 0, "Ev Idx": 3745 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.7)", "pid": 2070552, "tid": 2107648, + "ts": 5333368656588.506, "dur": 117.037, + "args": { + "External id": 297123,"Record function id": 0, "Ev Idx": 3746 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.7)", "pid": 2070552, "tid": 2107648, + "ts": 5333368656714.228, "dur": 38536.681, + "args": { + "External id": 297124,"Record function id": 0, "Ev Idx": 3747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368656807.581, "dur": 7.358, + "args": { + "External id": 297125,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368656824.758, "dur": 5.264, + "args": { + "External id": 297126,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368656844.312, "dur": 37518.103, + "args": { + "External id": 297127,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368656858.934, "dur": 37493.108, + "args": { + "External id": 297128,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368656901.313, "dur": 14.144, + "args": { + "External id": 297129,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368656921.529, "dur": 37392.542, + "args": { + "External id": 297130,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368656924.043, "dur": 37389.048, + "args": { + "External id": 297131,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368656927.956, "dur": 6.007, + "args": { + "External id": 297132,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368656936.025, "dur": 37372.575, + "args": { + "External id": 297133,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368694460.755, "dur": 9.727, + "args": { + "External id": 297134,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368694463.668, "dur": 6.442, + "args": { + "External id": 297135,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368694499.885, "dur": 426.259, + "args": { + "External id": 297136,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368694525.726, "dur": 394.701, + "args": { + "External id": 297137,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3760, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368694536.324, "dur": 378.322, + "args": { + "External id": 297138,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368694948.037, "dur": 1.990, + "args": { + "External id": 297139,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3762, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368695014.336, "dur": 7.542, + "args": { + "External id": 297140,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368695067.088, "dur": 1.930, + "args": { + "External id": 297141,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368695084.175, "dur": 1.656, + "args": { + "External id": 297142,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368695097.187, "dur": 1.215, + "args": { + "External id": 297143,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368695110.469, "dur": 1.409, + "args": { + "External id": 297144,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368695121.713, "dur": 1.244, + "args": { + "External id": 297145,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368695133.836, "dur": 1.191, + "args": { + "External id": 297146,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368695145.641, "dur": 1.148, + "args": { + "External id": 297147,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368695156.022, "dur": 1.084, + "args": { + "External id": 297148,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368695294.619, "dur": 2860.495, + "args": { + "External id": 297149,"Record function id": 0, "Ev Idx": 3772 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.6)", "pid": 2070552, "tid": 2107648, + "ts": 5333368695317.083, "dur": 1070.334, + "args": { + "External id": 297150,"Record function id": 0, "Ev Idx": 3773 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2070552, "tid": 2107648, + "ts": 5333368695332.037, "dur": 370.254, + "args": { + "External id": 297151,"Record function id": 0, "Ev Idx": 3774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368695426.824, "dur": 5.064, + "args": { + "External id": 297152,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368695435.762, "dur": 1.041, + "args": { + "External id": 297153,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368695438.578, "dur": 1.008, + "args": { + "External id": 297154,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368695441.219, "dur": 1.063, + "args": { + "External id": 297155,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368695443.788, "dur": 1.415, + "args": { + "External id": 297156,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368695447.202, "dur": 1.674, + "args": { + "External id": 297157,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368695450.343, "dur": 0.929, + "args": { + "External id": 297158,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368695452.526, "dur": 0.877, + "args": { + "External id": 297159,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368695455.331, "dur": 1.041, + "args": { + "External id": 297160,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368695457.672, "dur": 1.167, + "args": { + "External id": 297161,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368695477.476, "dur": 190.185, + "args": { + "External id": 297162,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368695493.767, "dur": 168.148, + "args": { + "External id": 297163,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368695506.586, "dur": 13.112, + "args": { + "External id": 297164,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368695523.593, "dur": 70.508, + "args": { + "External id": 297165,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368695525.825, "dur": 67.945, + "args": { + "External id": 297166,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368695530.096, "dur": 6.207, + "args": { + "External id": 297167,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368695538.051, "dur": 55.162, + "args": { + "External id": 297168,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3791 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.5", "pid": 2070552, "tid": 2107648, + "ts": 5333368695795.873, "dur": 583.925, + "args": { + "External id": 297169,"Record function id": 0, "Ev Idx": 3792 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2070552, "tid": 2107648, + "ts": 5333368695813.352, "dur": 553.457, + "args": { + "External id": 297170,"Record function id": 0, "Ev Idx": 3793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368695878.859, "dur": 6.644, + "args": { + "External id": 297171,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368695901.248, "dur": 29.427, + "args": { + "External id": 297172,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368695906.468, "dur": 1.619, + "args": { + "External id": 297173,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368695910.844, "dur": 0.982, + "args": { + "External id": 297174,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368695913.373, "dur": 0.735, + "args": { + "External id": 297175,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368695915.328, "dur": 0.502, + "args": { + "External id": 297176,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368695917.122, "dur": 0.247, + "args": { + "External id": 297177,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368695919.152, "dur": 0.669, + "args": { + "External id": 297178,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368695921.253, "dur": 0.612, + "args": { + "External id": 297179,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368695923.568, "dur": 0.685, + "args": { + "External id": 297180,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368695925.382, "dur": 0.700, + "args": { + "External id": 297181,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368695941.362, "dur": 33.449, + "args": { + "External id": 297182,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368696004.632, "dur": 98.635, + "args": { + "External id": 297183,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368696014.446, "dur": 3.288, + "args": { + "External id": 297184,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368696022.814, "dur": 10.469, + "args": { + "External id": 297185,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368696026.944, "dur": 5.894, + "args": { + "External id": 297186,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368696030.571, "dur": 0.877, + "args": { + "External id": 297187,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368696040.165, "dur": 27.090, + "args": { + "External id": 297188,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368696042.253, "dur": 0.700, + "args": { + "External id": 297189,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368696044.621, "dur": 0.696, + "args": { + "External id": 297190,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368696047.737, "dur": 0.412, + "args": { + "External id": 297191,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368696049.714, "dur": 0.709, + "args": { + "External id": 297192,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368696051.403, "dur": 0.714, + "args": { + "External id": 297193,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368696053.714, "dur": 0.838, + "args": { + "External id": 297194,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368696056.088, "dur": 0.988, + "args": { + "External id": 297195,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368696058.396, "dur": 0.728, + "args": { + "External id": 297196,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368696060.322, "dur": 0.869, + "args": { + "External id": 297197,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368696076.806, "dur": 19.308, + "args": { + "External id": 297198,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368696144.283, "dur": 147.852, + "args": { + "External id": 297199,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368696186.493, "dur": 101.846, + "args": { + "External id": 297200,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3823, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368696199.181, "dur": 83.657, + "args": { + "External id": 297201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368696309.126, "dur": 1.692, + "args": { + "External id": 297202,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3825, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368696395.544, "dur": 1737.360, + "args": { + "External id": 297203,"Sequence number": 1209174, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3826 + } + }, + { + "ph": "f", "id": 58, "pid": 2070552, "tid": 2107648, "ts": 5333368696395.544, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368696510.177, "dur": 150.946, + "args": { + "External id": 297204,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368696707.136, "dur": 43.468, + "args": { + "External id": 297205,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368696768.729, "dur": 57.597, + "args": { + "External id": 297206,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368696836.876, "dur": 33.459, + "args": { + "External id": 297207,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368696876.493, "dur": 52.220, + "args": { + "External id": 297208,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368696935.626, "dur": 27.553, + "args": { + "External id": 297209,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368696970.996, "dur": 42.163, + "args": { + "External id": 297210,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368697033.902, "dur": 24.891, + "args": { + "External id": 297211,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368697077.358, "dur": 29.962, + "args": { + "External id": 297212,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368697127.060, "dur": 18.293, + "args": { + "External id": 297213,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368697159.162, "dur": 38.432, + "args": { + "External id": 297214,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368697218.358, "dur": 42.050, + "args": { + "External id": 297215,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368697268.451, "dur": 43.238, + "args": { + "External id": 297216,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368697342.005, "dur": 173.204, + "args": { + "External id": 297217,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368697421.105, "dur": 6.569, + "args": { + "External id": 297218,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368697429.369, "dur": 3.603, + "args": { + "External id": 297219,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368697553.948, "dur": 26.112, + "args": { + "External id": 297220,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368697595.972, "dur": 15.389, + "args": { + "External id": 297221,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368697619.550, "dur": 84.062, + "args": { + "External id": 297222,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368697713.374, "dur": 55.263, + "args": { + "External id": 297223,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368697783.339, "dur": 24.923, + "args": { + "External id": 297224,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368697813.561, "dur": 32.015, + "args": { + "External id": 297225,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368697854.751, "dur": 20.376, + "args": { + "External id": 297226,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368697883.136, "dur": 31.535, + "args": { + "External id": 297227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368697935.119, "dur": 24.341, + "args": { + "External id": 297228,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 3851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368697982.365, "dur": 24.598, + "args": { + "External id": 297229,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368698023.080, "dur": 20.366, + "args": { + "External id": 297230,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368698059.399, "dur": 14.874, + "args": { + "External id": 297231,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368698086.149, "dur": 16.571, + "args": { + "External id": 297232,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 3855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368698199.334, "dur": 21.161, + "args": { + "External id": 297233,"Record function id": 0, "Ev Idx": 3856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368698204.415, "dur": 14.455, + "args": { + "External id": 297234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368698209.451, "dur": 7.732, + "args": { + "External id": 297235,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368698211.014, "dur": 5.803, + "args": { + "External id": 297236,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368698226.568, "dur": 6.886, + "args": { + "External id": 297237,"Record function id": 0, "Ev Idx": 3860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368698228.529, "dur": 4.268, + "args": { + "External id": 297238,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368698230.008, "dur": 2.132, + "args": { + "External id": 297239,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368698230.549, "dur": 1.395, + "args": { + "External id": 297240,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368698238.008, "dur": 6.244, + "args": { + "External id": 297241,"Record function id": 0, "Ev Idx": 3864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368698240.008, "dur": 3.594, + "args": { + "External id": 297242,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368698240.999, "dur": 1.955, + "args": { + "External id": 297243,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368698241.755, "dur": 1.076, + "args": { + "External id": 297244,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 3867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368698248.574, "dur": 7.054, + "args": { + "External id": 297245,"Record function id": 0, "Ev Idx": 3868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368698249.731, "dur": 5.335, + "args": { + "External id": 297246,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368698250.371, "dur": 4.253, + "args": { + "External id": 297247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368698253.360, "dur": 1.167, + "args": { + "External id": 297248,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 3871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368698258.646, "dur": 7.355, + "args": { + "External id": 297249,"Record function id": 0, "Ev Idx": 3872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368698262.971, "dur": 2.595, + "args": { + "External id": 297250,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368698263.599, "dur": 1.547, + "args": { + "External id": 297251,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368698264.109, "dur": 0.960, + "args": { + "External id": 297252,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368698269.062, "dur": 4.564, + "args": { + "External id": 297253,"Record function id": 0, "Ev Idx": 3876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368698270.203, "dur": 2.996, + "args": { + "External id": 297254,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368698270.852, "dur": 1.772, + "args": { + "External id": 297255,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368698271.181, "dur": 1.337, + "args": { + "External id": 297256,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368698276.590, "dur": 4.032, + "args": { + "External id": 297257,"Record function id": 0, "Ev Idx": 3880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368698277.828, "dur": 2.371, + "args": { + "External id": 297258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368698278.530, "dur": 1.276, + "args": { + "External id": 297259,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368698278.848, "dur": 0.882, + "args": { + "External id": 297260,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368698283.552, "dur": 4.323, + "args": { + "External id": 297261,"Record function id": 0, "Ev Idx": 3884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368698284.984, "dur": 2.451, + "args": { + "External id": 297262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368698285.502, "dur": 1.399, + "args": { + "External id": 297263,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368698285.772, "dur": 1.054, + "args": { + "External id": 297264,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 3887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368698290.876, "dur": 3.976, + "args": { + "External id": 297265,"Record function id": 0, "Ev Idx": 3888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368698292.293, "dur": 2.145, + "args": { + "External id": 297266,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368698292.742, "dur": 1.079, + "args": { + "External id": 297267,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368698292.998, "dur": 0.744, + "args": { + "External id": 297268,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 3891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368698298.666, "dur": 35948.898, + "args": { + "External id": 297269,"Record function id": 0, "Sequence number": 1209173, "Fwd thread id": 1, "Ev Idx": 3892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368698299.892, "dur": 35937.644, + "args": { + "External id": 297270,"Sequence number": 1209173, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3893 + } + }, + { + "ph": "f", "id": 59, "pid": 2070552, "tid": 2107648, "ts": 5333368698299.892, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.6)", "pid": 2070552, "tid": 2107648, + "ts": 5333368698334.001, "dur": 40.862, + "args": { + "External id": 297271,"Record function id": 0, "Ev Idx": 3894 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.6)", "pid": 2070552, "tid": 2107648, + "ts": 5333368698386.132, "dur": 74.059, + "args": { + "External id": 297272,"Record function id": 0, "Ev Idx": 3895 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.6)", "pid": 2070552, "tid": 2107648, + "ts": 5333368698466.516, "dur": 35761.244, + "args": { + "External id": 297273,"Record function id": 0, "Ev Idx": 3896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368698558.120, "dur": 7.158, + "args": { + "External id": 297274,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368698574.811, "dur": 5.138, + "args": { + "External id": 297275,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368698594.313, "dur": 34848.459, + "args": { + "External id": 297276,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368698607.550, "dur": 34824.286, + "args": { + "External id": 297277,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 3900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368698688.618, "dur": 15.135, + "args": { + "External id": 297278,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368698709.935, "dur": 34681.579, + "args": { + "External id": 297279,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 3902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368698712.443, "dur": 34678.303, + "args": { + "External id": 297280,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 3903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368698716.241, "dur": 6.537, + "args": { + "External id": 297281,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368698724.516, "dur": 34661.655, + "args": { + "External id": 297282,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 3905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368733544.198, "dur": 13.130, + "args": { + "External id": 297283,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 3906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368733547.210, "dur": 9.760, + "args": { + "External id": 297284,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368733587.284, "dur": 330.732, + "args": { + "External id": 297285,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 3908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368733612.971, "dur": 299.980, + "args": { + "External id": 297286,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3909, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368733652.422, "dur": 254.371, + "args": { + "External id": 297287,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 3910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368733938.013, "dur": 2.344, + "args": { + "External id": 297288,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3911, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368734000.326, "dur": 6.790, + "args": { + "External id": 297289,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368734052.117, "dur": 1.259, + "args": { + "External id": 297290,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368734068.055, "dur": 1.444, + "args": { + "External id": 297291,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368734080.185, "dur": 1.229, + "args": { + "External id": 297292,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368734092.734, "dur": 1.161, + "args": { + "External id": 297293,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368734103.519, "dur": 0.933, + "args": { + "External id": 297294,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368734114.948, "dur": 1.137, + "args": { + "External id": 297295,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368734126.409, "dur": 1.221, + "args": { + "External id": 297296,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368734138.470, "dur": 1.177, + "args": { + "External id": 297297,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368734266.061, "dur": 2818.045, + "args": { + "External id": 297298,"Record function id": 0, "Ev Idx": 3921 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.5)", "pid": 2070552, "tid": 2107648, + "ts": 5333368734287.145, "dur": 1068.172, + "args": { + "External id": 297299,"Record function id": 0, "Ev Idx": 3922 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2070552, "tid": 2107648, + "ts": 5333368734301.453, "dur": 357.705, + "args": { + "External id": 297300,"Record function id": 0, "Ev Idx": 3923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368734390.453, "dur": 4.827, + "args": { + "External id": 297301,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 3924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368734398.716, "dur": 0.982, + "args": { + "External id": 297302,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368734401.522, "dur": 0.976, + "args": { + "External id": 297303,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368734404.113, "dur": 1.472, + "args": { + "External id": 297304,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368734407.552, "dur": 1.121, + "args": { + "External id": 297305,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368734410.075, "dur": 1.452, + "args": { + "External id": 297306,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 3929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368734413.498, "dur": 1.062, + "args": { + "External id": 297307,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 3930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368734415.910, "dur": 0.928, + "args": { + "External id": 297308,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368734418.152, "dur": 0.948, + "args": { + "External id": 297309,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368734420.725, "dur": 0.933, + "args": { + "External id": 297310,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 3933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368734439.977, "dur": 151.110, + "args": { + "External id": 297311,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368734455.569, "dur": 131.183, + "args": { + "External id": 297312,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 3935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368734469.255, "dur": 13.670, + "args": { + "External id": 297313,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368734486.991, "dur": 70.328, + "args": { + "External id": 297314,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 3937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368734489.553, "dur": 67.481, + "args": { + "External id": 297315,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 3938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368734493.404, "dur": 7.153, + "args": { + "External id": 297316,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368734502.231, "dur": 54.091, + "args": { + "External id": 297317,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 3940 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.4", "pid": 2070552, "tid": 2107648, + "ts": 5333368734758.764, "dur": 589.507, + "args": { + "External id": 297318,"Record function id": 0, "Ev Idx": 3941 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2070552, "tid": 2107648, + "ts": 5333368734776.075, "dur": 559.091, + "args": { + "External id": 297319,"Record function id": 0, "Ev Idx": 3942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368734841.956, "dur": 6.093, + "args": { + "External id": 297320,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368734864.309, "dur": 29.127, + "args": { + "External id": 297321,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368734869.242, "dur": 1.910, + "args": { + "External id": 297322,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368734873.026, "dur": 0.916, + "args": { + "External id": 297323,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368734875.452, "dur": 0.958, + "args": { + "External id": 297324,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368734877.963, "dur": 0.729, + "args": { + "External id": 297325,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368734880.025, "dur": 0.606, + "args": { + "External id": 297326,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368734882.294, "dur": 0.424, + "args": { + "External id": 297327,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368734884.010, "dur": 0.444, + "args": { + "External id": 297328,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368734885.697, "dur": 0.740, + "args": { + "External id": 297329,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368734887.541, "dur": 0.764, + "args": { + "External id": 297330,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368734903.578, "dur": 35.269, + "args": { + "External id": 297331,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368734970.954, "dur": 95.808, + "args": { + "External id": 297332,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 3955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368734980.742, "dur": 2.680, + "args": { + "External id": 297333,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368734988.495, "dur": 10.052, + "args": { + "External id": 297334,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368734992.873, "dur": 5.255, + "args": { + "External id": 297335,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 3958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368734996.267, "dur": 0.633, + "args": { + "External id": 297336,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 3959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368735005.339, "dur": 23.252, + "args": { + "External id": 297337,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 3960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368735007.017, "dur": 0.491, + "args": { + "External id": 297338,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368735009.031, "dur": 0.989, + "args": { + "External id": 297339,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368735011.511, "dur": 1.096, + "args": { + "External id": 297340,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368735013.711, "dur": 0.896, + "args": { + "External id": 297341,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368735015.917, "dur": 0.680, + "args": { + "External id": 297342,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368735018.004, "dur": 0.452, + "args": { + "External id": 297343,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368735019.965, "dur": 0.400, + "args": { + "External id": 297344,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368735022.032, "dur": 1.080, + "args": { + "External id": 297345,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368735024.542, "dur": 0.562, + "args": { + "External id": 297346,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 3969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368735038.591, "dur": 20.624, + "args": { + "External id": 297347,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 3970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368735109.050, "dur": 148.899, + "args": { + "External id": 297348,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 3971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368735132.653, "dur": 121.229, + "args": { + "External id": 297349,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 3972, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368735141.847, "dur": 106.362, + "args": { + "External id": 297350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 3973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368735276.713, "dur": 1.918, + "args": { + "External id": 297351,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 3974, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368735363.324, "dur": 1698.758, + "args": { + "External id": 297352,"Sequence number": 1209172, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 3975 + } + }, + { + "ph": "f", "id": 60, "pid": 2070552, "tid": 2107648, "ts": 5333368735363.324, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368735478.018, "dur": 108.265, + "args": { + "External id": 297353,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 3976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368735666.349, "dur": 45.333, + "args": { + "External id": 297354,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 3977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368735731.659, "dur": 58.436, + "args": { + "External id": 297355,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 3978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368735801.065, "dur": 33.530, + "args": { + "External id": 297356,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368735841.358, "dur": 46.613, + "args": { + "External id": 297357,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368735894.658, "dur": 28.027, + "args": { + "External id": 297358,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 3981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368735929.989, "dur": 42.041, + "args": { + "External id": 297359,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 3982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368735994.238, "dur": 23.416, + "args": { + "External id": 297360,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 3983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368736035.980, "dur": 31.245, + "args": { + "External id": 297361,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368736087.612, "dur": 19.507, + "args": { + "External id": 297362,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 3985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368736119.293, "dur": 15.999, + "args": { + "External id": 297363,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 3986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368736147.210, "dur": 51.945, + "args": { + "External id": 297364,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368736205.626, "dur": 42.614, + "args": { + "External id": 297365,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368736280.484, "dur": 170.873, + "args": { + "External id": 297366,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 3989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368736354.958, "dur": 6.901, + "args": { + "External id": 297367,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368736363.792, "dur": 3.390, + "args": { + "External id": 297368,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 3991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368736488.614, "dur": 26.205, + "args": { + "External id": 297369,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368736525.877, "dur": 19.096, + "args": { + "External id": 297370,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 3993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368736553.046, "dur": 36.323, + "args": { + "External id": 297371,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368736595.359, "dur": 73.335, + "args": { + "External id": 297372,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368736680.083, "dur": 28.851, + "args": { + "External id": 297373,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368736714.096, "dur": 47.870, + "args": { + "External id": 297374,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368736775.574, "dur": 26.327, + "args": { + "External id": 297375,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 3998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368736810.481, "dur": 31.831, + "args": { + "External id": 297376,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 3999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368736865.487, "dur": 23.685, + "args": { + "External id": 297377,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368736908.611, "dur": 28.889, + "args": { + "External id": 297378,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368736953.389, "dur": 17.530, + "args": { + "External id": 297379,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368736988.409, "dur": 14.598, + "args": { + "External id": 297380,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368737014.548, "dur": 16.725, + "args": { + "External id": 297381,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368737107.108, "dur": 15.555, + "args": { + "External id": 297382,"Record function id": 0, "Ev Idx": 4005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368737110.401, "dur": 11.201, + "args": { + "External id": 297383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368737114.504, "dur": 6.157, + "args": { + "External id": 297384,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368737116.032, "dur": 4.536, + "args": { + "External id": 297385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368737126.515, "dur": 4.494, + "args": { + "External id": 297386,"Record function id": 0, "Ev Idx": 4009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368737127.894, "dur": 2.692, + "args": { + "External id": 297387,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368737128.620, "dur": 1.493, + "args": { + "External id": 297388,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368737128.956, "dur": 1.084, + "args": { + "External id": 297389,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368737134.260, "dur": 4.253, + "args": { + "External id": 297390,"Record function id": 0, "Ev Idx": 4013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368737135.546, "dur": 2.552, + "args": { + "External id": 297391,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368737136.224, "dur": 1.462, + "args": { + "External id": 297392,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368737136.551, "dur": 1.045, + "args": { + "External id": 297393,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368737141.661, "dur": 3.976, + "args": { + "External id": 297394,"Record function id": 0, "Ev Idx": 4017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368737142.898, "dur": 2.343, + "args": { + "External id": 297395,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368737143.550, "dur": 1.261, + "args": { + "External id": 297396,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368737143.832, "dur": 0.894, + "args": { + "External id": 297397,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368737148.655, "dur": 4.307, + "args": { + "External id": 297398,"Record function id": 0, "Ev Idx": 4021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368737150.103, "dur": 2.442, + "args": { + "External id": 297399,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368737150.716, "dur": 1.414, + "args": { + "External id": 297400,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368737151.119, "dur": 0.936, + "args": { + "External id": 297401,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368737156.038, "dur": 4.146, + "args": { + "External id": 297402,"Record function id": 0, "Ev Idx": 4025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368737157.154, "dur": 2.597, + "args": { + "External id": 297403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368737157.823, "dur": 1.292, + "args": { + "External id": 297404,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368737158.120, "dur": 0.920, + "args": { + "External id": 297405,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368737163.284, "dur": 26.944, + "args": { + "External id": 297406,"Record function id": 0, "Ev Idx": 4029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368737164.595, "dur": 24.014, + "args": { + "External id": 297407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368737182.998, "dur": 4.422, + "args": { + "External id": 297408,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368737184.576, "dur": 2.511, + "args": { + "External id": 297409,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368737197.379, "dur": 6.906, + "args": { + "External id": 297410,"Record function id": 0, "Ev Idx": 4033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368737199.452, "dur": 4.159, + "args": { + "External id": 297411,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368737200.789, "dur": 2.101, + "args": { + "External id": 297412,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368737201.455, "dur": 1.261, + "args": { + "External id": 297413,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368737208.752, "dur": 6.782, + "args": { + "External id": 297414,"Record function id": 0, "Ev Idx": 4037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368737211.063, "dur": 3.813, + "args": { + "External id": 297415,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368737212.132, "dur": 2.184, + "args": { + "External id": 297416,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368737212.974, "dur": 1.171, + "args": { + "External id": 297417,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368737221.474, "dur": 36853.869, + "args": { + "External id": 297418,"Record function id": 0, "Sequence number": 1209171, "Fwd thread id": 1, "Ev Idx": 4041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368737223.296, "dur": 36843.131, + "args": { + "External id": 297419,"Sequence number": 1209171, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4042 + } + }, + { + "ph": "f", "id": 61, "pid": 2070552, "tid": 2107648, "ts": 5333368737223.296, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.5)", "pid": 2070552, "tid": 2107648, + "ts": 5333368737258.893, "dur": 45.116, + "args": { + "External id": 297420,"Record function id": 0, "Ev Idx": 4043 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.5)", "pid": 2070552, "tid": 2107648, + "ts": 5333368737312.371, "dur": 72.996, + "args": { + "External id": 297421,"Record function id": 0, "Ev Idx": 4044 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.5)", "pid": 2070552, "tid": 2107648, + "ts": 5333368737391.432, "dur": 36667.362, + "args": { + "External id": 297422,"Record function id": 0, "Ev Idx": 4045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368737490.609, "dur": 7.582, + "args": { + "External id": 297423,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368737507.939, "dur": 5.276, + "args": { + "External id": 297424,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368737527.295, "dur": 35685.703, + "args": { + "External id": 297425,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368737545.866, "dur": 35654.821, + "args": { + "External id": 297426,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368737601.922, "dur": 15.755, + "args": { + "External id": 297427,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368737661.887, "dur": 35483.563, + "args": { + "External id": 297428,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368737664.905, "dur": 35479.714, + "args": { + "External id": 297429,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368737669.444, "dur": 7.225, + "args": { + "External id": 297430,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368737678.592, "dur": 35461.873, + "args": { + "External id": 297431,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368773317.952, "dur": 10.150, + "args": { + "External id": 297432,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368773320.942, "dur": 6.809, + "args": { + "External id": 297433,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368773358.931, "dur": 407.029, + "args": { + "External id": 297434,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368773385.014, "dur": 375.104, + "args": { + "External id": 297435,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4058, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368773396.173, "dur": 357.761, + "args": { + "External id": 297436,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368773788.298, "dur": 2.785, + "args": { + "External id": 297437,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4060, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368773855.376, "dur": 7.466, + "args": { + "External id": 297438,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368773907.626, "dur": 1.419, + "args": { + "External id": 297439,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368773924.298, "dur": 1.315, + "args": { + "External id": 297440,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368773938.418, "dur": 1.279, + "args": { + "External id": 297441,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368773951.768, "dur": 1.139, + "args": { + "External id": 297442,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368773962.365, "dur": 1.072, + "args": { + "External id": 297443,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368773975.106, "dur": 1.058, + "args": { + "External id": 297444,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368773986.732, "dur": 1.271, + "args": { + "External id": 297445,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368773997.088, "dur": 1.037, + "args": { + "External id": 297446,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368774090.477, "dur": 2862.692, + "args": { + "External id": 297447,"Record function id": 0, "Ev Idx": 4070 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.4)", "pid": 2070552, "tid": 2107648, + "ts": 5333368774111.364, "dur": 1107.593, + "args": { + "External id": 297448,"Record function id": 0, "Ev Idx": 4071 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2070552, "tid": 2107648, + "ts": 5333368774126.575, "dur": 363.053, + "args": { + "External id": 297449,"Record function id": 0, "Ev Idx": 4072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368774247.443, "dur": 5.494, + "args": { + "External id": 297450,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368774256.591, "dur": 1.125, + "args": { + "External id": 297451,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368774259.442, "dur": 1.491, + "args": { + "External id": 297452,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368774262.297, "dur": 0.993, + "args": { + "External id": 297453,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368774265.229, "dur": 1.462, + "args": { + "External id": 297454,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368774268.271, "dur": 1.119, + "args": { + "External id": 297455,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368774270.769, "dur": 1.102, + "args": { + "External id": 297456,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368774273.437, "dur": 0.917, + "args": { + "External id": 297457,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368774275.971, "dur": 1.456, + "args": { + "External id": 297458,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368774279.135, "dur": 0.781, + "args": { + "External id": 297459,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368774300.899, "dur": 157.257, + "args": { + "External id": 297460,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368774316.911, "dur": 136.678, + "args": { + "External id": 297461,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368774331.831, "dur": 14.112, + "args": { + "External id": 297462,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368774350.023, "dur": 73.376, + "args": { + "External id": 297463,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368774352.537, "dur": 70.569, + "args": { + "External id": 297464,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368774356.280, "dur": 7.320, + "args": { + "External id": 297465,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368774365.211, "dur": 57.229, + "args": { + "External id": 297466,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4089 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.3", "pid": 2070552, "tid": 2107648, + "ts": 5333368774584.782, "dur": 622.628, + "args": { + "External id": 297467,"Record function id": 0, "Ev Idx": 4090 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2070552, "tid": 2107648, + "ts": 5333368774602.899, "dur": 582.316, + "args": { + "External id": 297468,"Record function id": 0, "Ev Idx": 4091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368774717.658, "dur": 6.909, + "args": { + "External id": 297469,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368774741.182, "dur": 36.972, + "args": { + "External id": 297470,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368774748.247, "dur": 1.798, + "args": { + "External id": 297471,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368774751.484, "dur": 0.602, + "args": { + "External id": 297472,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368774764.385, "dur": 0.551, + "args": { + "External id": 297473,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368774765.783, "dur": 0.862, + "args": { + "External id": 297474,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368774767.692, "dur": 0.713, + "args": { + "External id": 297475,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368774769.553, "dur": 0.648, + "args": { + "External id": 297476,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368774771.345, "dur": 0.304, + "args": { + "External id": 297477,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368774772.448, "dur": 0.626, + "args": { + "External id": 297478,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368774774.001, "dur": 0.227, + "args": { + "External id": 297479,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368774788.388, "dur": 36.970, + "args": { + "External id": 297480,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368774856.437, "dur": 89.336, + "args": { + "External id": 297481,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368774866.336, "dur": 3.097, + "args": { + "External id": 297482,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368774874.904, "dur": 10.071, + "args": { + "External id": 297483,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368774879.091, "dur": 5.473, + "args": { + "External id": 297484,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368774882.186, "dur": 1.159, + "args": { + "External id": 297485,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368774891.974, "dur": 16.823, + "args": { + "External id": 297486,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368774893.446, "dur": 0.647, + "args": { + "External id": 297487,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368774895.281, "dur": 0.467, + "args": { + "External id": 297488,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368774896.636, "dur": 0.598, + "args": { + "External id": 297489,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368774898.369, "dur": 0.323, + "args": { + "External id": 297490,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368774899.712, "dur": 0.398, + "args": { + "External id": 297491,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368774901.038, "dur": 0.778, + "args": { + "External id": 297492,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368774902.681, "dur": 0.622, + "args": { + "External id": 297493,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368774904.352, "dur": 0.455, + "args": { + "External id": 297494,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368774906.034, "dur": 0.278, + "args": { + "External id": 297495,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368774917.986, "dur": 19.632, + "args": { + "External id": 297496,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368774990.900, "dur": 111.966, + "args": { + "External id": 297497,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368775012.999, "dur": 86.521, + "args": { + "External id": 297498,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4121, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368775022.106, "dur": 72.955, + "args": { + "External id": 297499,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368775116.782, "dur": 1.569, + "args": { + "External id": 297500,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4123, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368775228.967, "dur": 1701.460, + "args": { + "External id": 297501,"Sequence number": 1209170, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4124 + } + }, + { + "ph": "f", "id": 62, "pid": 2070552, "tid": 2107648, "ts": 5333368775228.967, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368775353.497, "dur": 116.720, + "args": { + "External id": 297502,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368775508.135, "dur": 39.659, + "args": { + "External id": 297503,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368775563.817, "dur": 49.500, + "args": { + "External id": 297504,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368775664.064, "dur": 39.891, + "args": { + "External id": 297505,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368775711.499, "dur": 46.395, + "args": { + "External id": 297506,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368775764.857, "dur": 28.248, + "args": { + "External id": 297507,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368775800.973, "dur": 42.660, + "args": { + "External id": 297508,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368775866.056, "dur": 25.619, + "args": { + "External id": 297509,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368775908.718, "dur": 29.820, + "args": { + "External id": 297510,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368775958.386, "dur": 19.792, + "args": { + "External id": 297511,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368775990.278, "dur": 16.489, + "args": { + "External id": 297512,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368776014.910, "dur": 27.944, + "args": { + "External id": 297513,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368776045.958, "dur": 31.520, + "args": { + "External id": 297514,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368776105.001, "dur": 209.430, + "args": { + "External id": 297515,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368776209.386, "dur": 8.569, + "args": { + "External id": 297516,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368776220.482, "dur": 4.247, + "args": { + "External id": 297517,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368776349.408, "dur": 27.523, + "args": { + "External id": 297518,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368776387.622, "dur": 15.622, + "args": { + "External id": 297519,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368776411.900, "dur": 45.212, + "args": { + "External id": 297520,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368776463.176, "dur": 38.043, + "args": { + "External id": 297521,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368776507.515, "dur": 21.017, + "args": { + "External id": 297522,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368776533.105, "dur": 50.225, + "args": { + "External id": 297523,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368776596.172, "dur": 76.519, + "args": { + "External id": 297524,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368776683.546, "dur": 35.451, + "args": { + "External id": 297525,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368776742.044, "dur": 23.786, + "args": { + "External id": 297526,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368776783.519, "dur": 28.209, + "args": { + "External id": 297527,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368776825.627, "dur": 17.715, + "args": { + "External id": 297528,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368776857.298, "dur": 15.423, + "args": { + "External id": 297529,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368776883.376, "dur": 17.799, + "args": { + "External id": 297530,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368776976.702, "dur": 15.743, + "args": { + "External id": 297531,"Record function id": 0, "Ev Idx": 4154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368776980.226, "dur": 11.174, + "args": { + "External id": 297532,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368776984.530, "dur": 5.710, + "args": { + "External id": 297533,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368776985.741, "dur": 4.397, + "args": { + "External id": 297534,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368776996.316, "dur": 4.755, + "args": { + "External id": 297535,"Record function id": 0, "Ev Idx": 4158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368776997.725, "dur": 2.894, + "args": { + "External id": 297536,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368776998.582, "dur": 1.580, + "args": { + "External id": 297537,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368776998.934, "dur": 1.149, + "args": { + "External id": 297538,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368777004.239, "dur": 4.283, + "args": { + "External id": 297539,"Record function id": 0, "Ev Idx": 4162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368777005.577, "dur": 2.518, + "args": { + "External id": 297540,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368777006.396, "dur": 1.232, + "args": { + "External id": 297541,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368777006.935, "dur": 0.577, + "args": { + "External id": 297542,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368777011.660, "dur": 3.759, + "args": { + "External id": 297543,"Record function id": 0, "Ev Idx": 4166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368777012.765, "dur": 2.210, + "args": { + "External id": 297544,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368777013.566, "dur": 0.969, + "args": { + "External id": 297545,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368777013.855, "dur": 0.590, + "args": { + "External id": 297546,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368777018.463, "dur": 4.700, + "args": { + "External id": 297547,"Record function id": 0, "Ev Idx": 4170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368777019.609, "dur": 3.105, + "args": { + "External id": 297548,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368777020.345, "dur": 1.817, + "args": { + "External id": 297549,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368777020.810, "dur": 1.280, + "args": { + "External id": 297550,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368777026.214, "dur": 4.534, + "args": { + "External id": 297551,"Record function id": 0, "Ev Idx": 4174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368777027.499, "dur": 2.822, + "args": { + "External id": 297552,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368777028.218, "dur": 1.676, + "args": { + "External id": 297553,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368777028.634, "dur": 1.186, + "args": { + "External id": 297554,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368777033.905, "dur": 3.851, + "args": { + "External id": 297555,"Record function id": 0, "Ev Idx": 4178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368777035.128, "dur": 2.179, + "args": { + "External id": 297556,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368777035.854, "dur": 1.021, + "args": { + "External id": 297557,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368777036.161, "dur": 0.639, + "args": { + "External id": 297558,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368777040.723, "dur": 4.399, + "args": { + "External id": 297559,"Record function id": 0, "Ev Idx": 4182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368777042.012, "dur": 2.639, + "args": { + "External id": 297560,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368777042.726, "dur": 1.357, + "args": { + "External id": 297561,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368777043.061, "dur": 0.948, + "args": { + "External id": 297562,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368777048.117, "dur": 3.887, + "args": { + "External id": 297563,"Record function id": 0, "Ev Idx": 4186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368777049.189, "dur": 2.387, + "args": { + "External id": 297564,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368777049.866, "dur": 1.170, + "args": { + "External id": 297565,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368777050.183, "dur": 0.780, + "args": { + "External id": 297566,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368777055.750, "dur": 36434.855, + "args": { + "External id": 297567,"Record function id": 0, "Sequence number": 1209169, "Fwd thread id": 1, "Ev Idx": 4190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368777057.287, "dur": 36424.558, + "args": { + "External id": 297568,"Sequence number": 1209169, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4191 + } + }, + { + "ph": "f", "id": 63, "pid": 2070552, "tid": 2107648, "ts": 5333368777057.287, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.4)", "pid": 2070552, "tid": 2107648, + "ts": 5333368777089.131, "dur": 40.430, + "args": { + "External id": 297569,"Record function id": 0, "Ev Idx": 4192 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.4)", "pid": 2070552, "tid": 2107648, + "ts": 5333368777137.614, "dur": 110.828, + "args": { + "External id": 297570,"Record function id": 0, "Ev Idx": 4193 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.4)", "pid": 2070552, "tid": 2107648, + "ts": 5333368777259.246, "dur": 36214.723, + "args": { + "External id": 297571,"Record function id": 0, "Ev Idx": 4194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368777363.102, "dur": 7.947, + "args": { + "External id": 297572,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368777382.420, "dur": 5.729, + "args": { + "External id": 297573,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368777403.076, "dur": 35189.874, + "args": { + "External id": 297574,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368777420.413, "dur": 35162.726, + "args": { + "External id": 297575,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368777466.521, "dur": 18.110, + "args": { + "External id": 297576,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368777490.981, "dur": 35049.286, + "args": { + "External id": 297577,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368777493.432, "dur": 35046.073, + "args": { + "External id": 297578,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368777497.561, "dur": 5.702, + "args": { + "External id": 297579,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368777504.906, "dur": 35030.357, + "args": { + "External id": 297580,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368812714.772, "dur": 10.594, + "args": { + "External id": 297581,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368812717.730, "dur": 7.078, + "args": { + "External id": 297582,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368812755.006, "dur": 398.032, + "args": { + "External id": 297583,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368812778.998, "dur": 369.464, + "args": { + "External id": 297584,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4207, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368812789.514, "dur": 353.923, + "args": { + "External id": 297585,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368813188.450, "dur": 4.021, + "args": { + "External id": 297586,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4209, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368813272.628, "dur": 8.147, + "args": { + "External id": 297587,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368813327.674, "dur": 1.744, + "args": { + "External id": 297588,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368813344.471, "dur": 1.114, + "args": { + "External id": 297589,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368813356.120, "dur": 0.790, + "args": { + "External id": 297590,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368813367.368, "dur": 0.792, + "args": { + "External id": 297591,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368813377.213, "dur": 0.916, + "args": { + "External id": 297592,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368813387.920, "dur": 0.923, + "args": { + "External id": 297593,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368813398.894, "dur": 0.785, + "args": { + "External id": 297594,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368813411.035, "dur": 0.783, + "args": { + "External id": 297595,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368813507.980, "dur": 2801.589, + "args": { + "External id": 297596,"Record function id": 0, "Ev Idx": 4219 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.3)", "pid": 2070552, "tid": 2107648, + "ts": 5333368813528.572, "dur": 1038.617, + "args": { + "External id": 297597,"Record function id": 0, "Ev Idx": 4220 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2070552, "tid": 2107648, + "ts": 5333368813542.604, "dur": 357.892, + "args": { + "External id": 297598,"Record function id": 0, "Ev Idx": 4221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368813670.960, "dur": 4.886, + "args": { + "External id": 297599,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368813679.740, "dur": 0.698, + "args": { + "External id": 297600,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368813682.208, "dur": 0.805, + "args": { + "External id": 297601,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368813684.437, "dur": 0.707, + "args": { + "External id": 297602,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368813686.374, "dur": 0.777, + "args": { + "External id": 297603,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368813688.421, "dur": 0.888, + "args": { + "External id": 297604,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368813690.537, "dur": 0.770, + "args": { + "External id": 297605,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368813692.801, "dur": 0.767, + "args": { + "External id": 297606,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368813694.818, "dur": 0.866, + "args": { + "External id": 297607,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368813697.066, "dur": 1.031, + "args": { + "External id": 297608,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368813718.399, "dur": 151.066, + "args": { + "External id": 297609,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368813734.884, "dur": 129.919, + "args": { + "External id": 297610,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368813750.141, "dur": 12.605, + "args": { + "External id": 297611,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368813766.775, "dur": 68.885, + "args": { + "External id": 297612,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368813769.177, "dur": 66.181, + "args": { + "External id": 297613,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368813772.535, "dur": 5.153, + "args": { + "External id": 297614,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368813779.259, "dur": 55.361, + "args": { + "External id": 297615,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4238 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.2", "pid": 2070552, "tid": 2107648, + "ts": 5333368813993.867, "dur": 566.201, + "args": { + "External id": 297616,"Record function id": 0, "Ev Idx": 4239 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2070552, "tid": 2107648, + "ts": 5333368814011.384, "dur": 536.567, + "args": { + "External id": 297617,"Record function id": 0, "Ev Idx": 4240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368814074.536, "dur": 4.587, + "args": { + "External id": 297618,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368814094.467, "dur": 19.743, + "args": { + "External id": 297619,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368814098.756, "dur": 1.458, + "args": { + "External id": 297620,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368814101.991, "dur": 0.682, + "args": { + "External id": 297621,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368814103.636, "dur": 0.508, + "args": { + "External id": 297622,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368814104.864, "dur": 0.319, + "args": { + "External id": 297623,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368814106.045, "dur": 0.329, + "args": { + "External id": 297624,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368814106.949, "dur": 0.370, + "args": { + "External id": 297625,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368814107.912, "dur": 0.554, + "args": { + "External id": 297626,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368814109.661, "dur": 0.442, + "args": { + "External id": 297627,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368814111.030, "dur": 0.319, + "args": { + "External id": 297628,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368814124.245, "dur": 33.378, + "args": { + "External id": 297629,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368814217.590, "dur": 106.331, + "args": { + "External id": 297630,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368814230.659, "dur": 6.505, + "args": { + "External id": 297631,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368814242.683, "dur": 10.015, + "args": { + "External id": 297632,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368814246.751, "dur": 5.511, + "args": { + "External id": 297633,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368814249.984, "dur": 0.675, + "args": { + "External id": 297634,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368814259.459, "dur": 21.783, + "args": { + "External id": 297635,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368814260.666, "dur": 0.501, + "args": { + "External id": 297636,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368814262.170, "dur": 0.238, + "args": { + "External id": 297637,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368814263.216, "dur": 0.193, + "args": { + "External id": 297638,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368814264.152, "dur": 0.258, + "args": { + "External id": 297639,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368814265.225, "dur": 0.227, + "args": { + "External id": 297640,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368814267.004, "dur": 0.393, + "args": { + "External id": 297641,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368814268.835, "dur": 0.581, + "args": { + "External id": 297642,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368814270.779, "dur": 0.472, + "args": { + "External id": 297643,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368814272.556, "dur": 0.610, + "args": { + "External id": 297644,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368814291.831, "dur": 24.341, + "args": { + "External id": 297645,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368814369.611, "dur": 114.217, + "args": { + "External id": 297646,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368814393.601, "dur": 86.525, + "args": { + "External id": 297647,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4270, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368814402.830, "dur": 73.150, + "args": { + "External id": 297648,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368814498.686, "dur": 1.865, + "args": { + "External id": 297649,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4272, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368814574.472, "dur": 1714.034, + "args": { + "External id": 297650,"Sequence number": 1209168, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4273 + } + }, + { + "ph": "f", "id": 64, "pid": 2070552, "tid": 2107648, "ts": 5333368814574.472, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368814727.722, "dur": 110.584, + "args": { + "External id": 297651,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368814875.054, "dur": 39.948, + "args": { + "External id": 297652,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368814931.346, "dur": 50.056, + "args": { + "External id": 297653,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368814991.088, "dur": 32.629, + "args": { + "External id": 297654,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368815030.617, "dur": 45.065, + "args": { + "External id": 297655,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368815082.236, "dur": 27.453, + "args": { + "External id": 297656,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368815117.095, "dur": 41.890, + "args": { + "External id": 297657,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368815203.567, "dur": 33.880, + "args": { + "External id": 297658,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368815259.462, "dur": 30.235, + "args": { + "External id": 297659,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368815313.015, "dur": 18.442, + "args": { + "External id": 297660,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368815344.103, "dur": 14.651, + "args": { + "External id": 297661,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368815367.506, "dur": 34.975, + "args": { + "External id": 297662,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368815405.585, "dur": 33.750, + "args": { + "External id": 297663,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368815468.898, "dur": 220.330, + "args": { + "External id": 297664,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368815549.875, "dur": 6.585, + "args": { + "External id": 297665,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368815558.265, "dur": 3.042, + "args": { + "External id": 297666,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368815725.487, "dur": 30.124, + "args": { + "External id": 297667,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368815767.178, "dur": 16.551, + "args": { + "External id": 297668,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368815793.490, "dur": 43.502, + "args": { + "External id": 297669,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368815843.888, "dur": 37.277, + "args": { + "External id": 297670,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368815887.803, "dur": 22.178, + "args": { + "External id": 297671,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368815914.795, "dur": 29.440, + "args": { + "External id": 297672,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368815950.222, "dur": 36.690, + "args": { + "External id": 297673,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368816002.670, "dur": 33.902, + "args": { + "External id": 297674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368816057.112, "dur": 21.489, + "args": { + "External id": 297675,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368816097.172, "dur": 27.611, + "args": { + "External id": 297676,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368816139.667, "dur": 16.972, + "args": { + "External id": 297677,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368816194.120, "dur": 24.382, + "args": { + "External id": 297678,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368816236.285, "dur": 19.081, + "args": { + "External id": 297679,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368816332.512, "dur": 15.508, + "args": { + "External id": 297680,"Record function id": 0, "Ev Idx": 4303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368816336.002, "dur": 11.084, + "args": { + "External id": 297681,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368816340.268, "dur": 6.018, + "args": { + "External id": 297682,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368816341.491, "dur": 4.702, + "args": { + "External id": 297683,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368816351.848, "dur": 5.172, + "args": { + "External id": 297684,"Record function id": 0, "Ev Idx": 4307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368816353.309, "dur": 3.250, + "args": { + "External id": 297685,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368816354.033, "dur": 1.973, + "args": { + "External id": 297686,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368816354.394, "dur": 1.539, + "args": { + "External id": 297687,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368816360.205, "dur": 4.543, + "args": { + "External id": 297688,"Record function id": 0, "Ev Idx": 4311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368816361.899, "dur": 2.406, + "args": { + "External id": 297689,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368816362.411, "dur": 1.447, + "args": { + "External id": 297690,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368816362.890, "dur": 0.848, + "args": { + "External id": 297691,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368816367.914, "dur": 4.151, + "args": { + "External id": 297692,"Record function id": 0, "Ev Idx": 4315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368816369.054, "dur": 2.571, + "args": { + "External id": 297693,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368816369.804, "dur": 1.359, + "args": { + "External id": 297694,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368816370.303, "dur": 0.796, + "args": { + "External id": 297695,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368816375.061, "dur": 4.182, + "args": { + "External id": 297696,"Record function id": 0, "Ev Idx": 4319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368816376.236, "dur": 2.541, + "args": { + "External id": 297697,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368816376.944, "dur": 1.240, + "args": { + "External id": 297698,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368816377.222, "dur": 0.889, + "args": { + "External id": 297699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368816382.300, "dur": 4.177, + "args": { + "External id": 297700,"Record function id": 0, "Ev Idx": 4323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368816383.614, "dur": 2.446, + "args": { + "External id": 297701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368816384.270, "dur": 1.219, + "args": { + "External id": 297702,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368816384.749, "dur": 0.673, + "args": { + "External id": 297703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368816389.724, "dur": 4.673, + "args": { + "External id": 297704,"Record function id": 0, "Ev Idx": 4327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368816390.952, "dur": 2.978, + "args": { + "External id": 297705,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368816391.593, "dur": 1.757, + "args": { + "External id": 297706,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368816391.914, "dur": 1.363, + "args": { + "External id": 297707,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368816397.418, "dur": 4.305, + "args": { + "External id": 297708,"Record function id": 0, "Ev Idx": 4331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368816398.665, "dur": 2.644, + "args": { + "External id": 297709,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368816399.421, "dur": 1.451, + "args": { + "External id": 297710,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368816399.745, "dur": 1.058, + "args": { + "External id": 297711,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368816404.933, "dur": 4.321, + "args": { + "External id": 297712,"Record function id": 0, "Ev Idx": 4335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368816405.982, "dur": 2.841, + "args": { + "External id": 297713,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368816406.723, "dur": 1.517, + "args": { + "External id": 297714,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368816407.026, "dur": 1.137, + "args": { + "External id": 297715,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368816413.216, "dur": 36565.849, + "args": { + "External id": 297716,"Record function id": 0, "Sequence number": 1209167, "Fwd thread id": 1, "Ev Idx": 4339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368816414.383, "dur": 36555.453, + "args": { + "External id": 297717,"Sequence number": 1209167, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4340 + } + }, + { + "ph": "f", "id": 65, "pid": 2070552, "tid": 2107648, "ts": 5333368816414.383, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.3)", "pid": 2070552, "tid": 2107648, + "ts": 5333368816444.820, "dur": 41.097, + "args": { + "External id": 297718,"Record function id": 0, "Ev Idx": 4341 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.3)", "pid": 2070552, "tid": 2107648, + "ts": 5333368816493.529, "dur": 72.364, + "args": { + "External id": 297719,"Record function id": 0, "Ev Idx": 4342 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.3)", "pid": 2070552, "tid": 2107648, + "ts": 5333368816572.131, "dur": 36390.136, + "args": { + "External id": 297720,"Record function id": 0, "Ev Idx": 4343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368816698.035, "dur": 8.461, + "args": { + "External id": 297721,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368816717.411, "dur": 5.397, + "args": { + "External id": 297722,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368816736.727, "dur": 35392.051, + "args": { + "External id": 297723,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368816750.683, "dur": 35367.776, + "args": { + "External id": 297724,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368816791.186, "dur": 14.452, + "args": { + "External id": 297725,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368816811.780, "dur": 35267.380, + "args": { + "External id": 297726,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368816814.188, "dur": 35264.065, + "args": { + "External id": 297727,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368816818.174, "dur": 5.314, + "args": { + "External id": 297728,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368816825.294, "dur": 35248.848, + "args": { + "External id": 297729,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368852239.826, "dur": 10.006, + "args": { + "External id": 297730,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368852243.023, "dur": 6.314, + "args": { + "External id": 297731,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368852278.772, "dur": 394.902, + "args": { + "External id": 297732,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368852304.275, "dur": 364.287, + "args": { + "External id": 297733,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4356, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368852314.657, "dur": 347.951, + "args": { + "External id": 297734,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368852694.776, "dur": 2.485, + "args": { + "External id": 297735,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4358, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368852760.816, "dur": 6.644, + "args": { + "External id": 297736,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368852810.644, "dur": 1.563, + "args": { + "External id": 297737,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368852826.631, "dur": 1.386, + "args": { + "External id": 297738,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368852839.095, "dur": 1.315, + "args": { + "External id": 297739,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368852850.836, "dur": 1.298, + "args": { + "External id": 297740,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368852861.986, "dur": 1.617, + "args": { + "External id": 297741,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368852874.638, "dur": 1.454, + "args": { + "External id": 297742,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368852887.270, "dur": 1.349, + "args": { + "External id": 297743,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368852897.988, "dur": 1.293, + "args": { + "External id": 297744,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368852995.255, "dur": 2887.564, + "args": { + "External id": 297745,"Record function id": 0, "Ev Idx": 4368 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.2)", "pid": 2070552, "tid": 2107648, + "ts": 5333368853016.300, "dur": 1073.136, + "args": { + "External id": 297746,"Record function id": 0, "Ev Idx": 4369 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2070552, "tid": 2107648, + "ts": 5333368853030.093, "dur": 365.679, + "args": { + "External id": 297747,"Record function id": 0, "Ev Idx": 4370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368853114.541, "dur": 4.249, + "args": { + "External id": 297748,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368853122.130, "dur": 1.909, + "args": { + "External id": 297749,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368853126.070, "dur": 1.511, + "args": { + "External id": 297750,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368853129.218, "dur": 1.394, + "args": { + "External id": 297751,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368853132.080, "dur": 1.331, + "args": { + "External id": 297752,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368853134.685, "dur": 1.604, + "args": { + "External id": 297753,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368853138.118, "dur": 1.357, + "args": { + "External id": 297754,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368853141.239, "dur": 1.233, + "args": { + "External id": 297755,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368853143.921, "dur": 1.642, + "args": { + "External id": 297756,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368853146.993, "dur": 1.540, + "args": { + "External id": 297757,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368853184.565, "dur": 176.559, + "args": { + "External id": 297758,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368853206.819, "dur": 149.099, + "args": { + "External id": 297759,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368853222.916, "dur": 15.524, + "args": { + "External id": 297760,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368853243.087, "dur": 82.629, + "args": { + "External id": 297761,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368853245.809, "dur": 79.552, + "args": { + "External id": 297762,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368853250.145, "dur": 7.906, + "args": { + "External id": 297763,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368853260.370, "dur": 64.166, + "args": { + "External id": 297764,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4387 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.1", "pid": 2070552, "tid": 2107648, + "ts": 5333368853490.858, "dur": 591.631, + "args": { + "External id": 297765,"Record function id": 0, "Ev Idx": 4388 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2070552, "tid": 2107648, + "ts": 5333368853509.071, "dur": 561.459, + "args": { + "External id": 297766,"Record function id": 0, "Ev Idx": 4389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368853575.802, "dur": 5.112, + "args": { + "External id": 297767,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368853596.168, "dur": 67.399, + "args": { + "External id": 297768,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368853600.893, "dur": 1.868, + "args": { + "External id": 297769,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368853604.301, "dur": 0.757, + "args": { + "External id": 297770,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368853605.897, "dur": 0.769, + "args": { + "External id": 297771,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368853607.470, "dur": 0.660, + "args": { + "External id": 297772,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368853609.762, "dur": 0.777, + "args": { + "External id": 297773,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368853611.779, "dur": 0.841, + "args": { + "External id": 297774,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368853614.081, "dur": 0.410, + "args": { + "External id": 297775,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368853615.855, "dur": 0.642, + "args": { + "External id": 297776,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368853617.545, "dur": 0.819, + "args": { + "External id": 297777,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368853678.488, "dur": 35.471, + "args": { + "External id": 297778,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368853747.145, "dur": 99.599, + "args": { + "External id": 297779,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368853758.106, "dur": 4.665, + "args": { + "External id": 297780,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368853767.816, "dur": 10.624, + "args": { + "External id": 297781,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368853771.987, "dur": 6.013, + "args": { + "External id": 297782,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368853775.254, "dur": 0.846, + "args": { + "External id": 297783,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368853785.559, "dur": 24.795, + "args": { + "External id": 297784,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368853787.210, "dur": 0.744, + "args": { + "External id": 297785,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368853789.116, "dur": 0.460, + "args": { + "External id": 297786,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368853790.729, "dur": 0.757, + "args": { + "External id": 297787,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368853792.782, "dur": 0.750, + "args": { + "External id": 297788,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368853794.333, "dur": 0.586, + "args": { + "External id": 297789,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368853796.208, "dur": 0.436, + "args": { + "External id": 297790,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368853799.542, "dur": 0.603, + "args": { + "External id": 297791,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368853800.757, "dur": 0.643, + "args": { + "External id": 297792,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368853802.935, "dur": 0.562, + "args": { + "External id": 297793,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368853820.591, "dur": 18.524, + "args": { + "External id": 297794,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368853891.645, "dur": 115.204, + "args": { + "External id": 297795,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368853915.891, "dur": 87.776, + "args": { + "External id": 297796,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4419, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368853925.361, "dur": 73.034, + "args": { + "External id": 297797,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368854020.204, "dur": 1.643, + "args": { + "External id": 297798,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4421, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368854097.582, "dur": 1763.227, + "args": { + "External id": 297799,"Sequence number": 1209166, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4422 + } + }, + { + "ph": "f", "id": 66, "pid": 2070552, "tid": 2107648, "ts": 5333368854097.582, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368854241.955, "dur": 113.397, + "args": { + "External id": 297800,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368854394.495, "dur": 41.945, + "args": { + "External id": 297801,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368854453.889, "dur": 49.342, + "args": { + "External id": 297802,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368854513.242, "dur": 32.976, + "args": { + "External id": 297803,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368854552.782, "dur": 44.226, + "args": { + "External id": 297804,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368854604.141, "dur": 85.789, + "args": { + "External id": 297805,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368854707.885, "dur": 54.414, + "args": { + "External id": 297806,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368854787.005, "dur": 26.037, + "args": { + "External id": 297807,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368854830.778, "dur": 28.657, + "args": { + "External id": 297808,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368854880.634, "dur": 18.688, + "args": { + "External id": 297809,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368854912.821, "dur": 16.196, + "args": { + "External id": 297810,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368854937.747, "dur": 28.403, + "args": { + "External id": 297811,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368854969.121, "dur": 31.728, + "args": { + "External id": 297812,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368855029.645, "dur": 197.953, + "args": { + "External id": 297813,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368855106.580, "dur": 6.402, + "args": { + "External id": 297814,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368855114.853, "dur": 3.375, + "args": { + "External id": 297815,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368855273.903, "dur": 30.560, + "args": { + "External id": 297816,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368855316.744, "dur": 16.583, + "args": { + "External id": 297817,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368855343.410, "dur": 46.607, + "args": { + "External id": 297818,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368855397.211, "dur": 38.685, + "args": { + "External id": 297819,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368855442.808, "dur": 38.659, + "args": { + "External id": 297820,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368855493.141, "dur": 35.375, + "args": { + "External id": 297821,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368855536.062, "dur": 20.584, + "args": { + "External id": 297822,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368855564.109, "dur": 33.538, + "args": { + "External id": 297823,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368855616.182, "dur": 65.324, + "args": { + "External id": 297824,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368855706.599, "dur": 25.733, + "args": { + "External id": 297825,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368855749.002, "dur": 20.581, + "args": { + "External id": 297826,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368855784.548, "dur": 14.548, + "args": { + "External id": 297827,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368855811.806, "dur": 18.288, + "args": { + "External id": 297828,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368855906.902, "dur": 15.644, + "args": { + "External id": 297829,"Record function id": 0, "Ev Idx": 4452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368855909.950, "dur": 11.577, + "args": { + "External id": 297830,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368855914.097, "dur": 6.518, + "args": { + "External id": 297831,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368855915.449, "dur": 5.080, + "args": { + "External id": 297832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368855926.334, "dur": 5.254, + "args": { + "External id": 297833,"Record function id": 0, "Ev Idx": 4456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368855927.690, "dur": 3.445, + "args": { + "External id": 297834,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368855928.869, "dur": 1.664, + "args": { + "External id": 297835,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368855929.447, "dur": 1.014, + "args": { + "External id": 297836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368855934.849, "dur": 4.822, + "args": { + "External id": 297837,"Record function id": 0, "Ev Idx": 4460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368855936.361, "dur": 2.907, + "args": { + "External id": 297838,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368855937.169, "dur": 1.610, + "args": { + "External id": 297839,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368855937.816, "dur": 0.875, + "args": { + "External id": 297840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368855942.966, "dur": 7.339, + "args": { + "External id": 297841,"Record function id": 0, "Ev Idx": 4464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368855944.073, "dur": 2.728, + "args": { + "External id": 297842,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368855944.621, "dur": 1.718, + "args": { + "External id": 297843,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368855945.141, "dur": 1.113, + "args": { + "External id": 297844,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368855953.388, "dur": 4.033, + "args": { + "External id": 297845,"Record function id": 0, "Ev Idx": 4468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368855954.529, "dur": 2.441, + "args": { + "External id": 297846,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368855954.980, "dur": 1.338, + "args": { + "External id": 297847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368855955.410, "dur": 0.832, + "args": { + "External id": 297848,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368855960.499, "dur": 3.950, + "args": { + "External id": 297849,"Record function id": 0, "Ev Idx": 4472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368855961.543, "dur": 2.496, + "args": { + "External id": 297850,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368855962.042, "dur": 1.446, + "args": { + "External id": 297851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368855962.366, "dur": 1.047, + "args": { + "External id": 297852,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368855967.817, "dur": 3.642, + "args": { + "External id": 297853,"Record function id": 0, "Ev Idx": 4476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368855968.871, "dur": 2.152, + "args": { + "External id": 297854,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368855969.346, "dur": 1.111, + "args": { + "External id": 297855,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368855969.632, "dur": 0.749, + "args": { + "External id": 297856,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368855974.498, "dur": 3.667, + "args": { + "External id": 297857,"Record function id": 0, "Ev Idx": 4480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368855975.537, "dur": 2.201, + "args": { + "External id": 297858,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368855976.018, "dur": 1.176, + "args": { + "External id": 297859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368855976.306, "dur": 0.813, + "args": { + "External id": 297860,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368855981.121, "dur": 3.879, + "args": { + "External id": 297861,"Record function id": 0, "Ev Idx": 4484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368855982.083, "dur": 2.513, + "args": { + "External id": 297862,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368855982.586, "dur": 1.433, + "args": { + "External id": 297863,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368855982.891, "dur": 1.057, + "args": { + "External id": 297864,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368855988.645, "dur": 36613.126, + "args": { + "External id": 297865,"Record function id": 0, "Sequence number": 1209165, "Fwd thread id": 1, "Ev Idx": 4488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368855989.845, "dur": 36603.264, + "args": { + "External id": 297866,"Sequence number": 1209165, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4489 + } + }, + { + "ph": "f", "id": 67, "pid": 2070552, "tid": 2107648, "ts": 5333368855989.845, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.2)", "pid": 2070552, "tid": 2107648, + "ts": 5333368856022.165, "dur": 41.575, + "args": { + "External id": 297867,"Record function id": 0, "Ev Idx": 4490 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.2)", "pid": 2070552, "tid": 2107648, + "ts": 5333368856071.703, "dur": 72.409, + "args": { + "External id": 297868,"Record function id": 0, "Ev Idx": 4491 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.2)", "pid": 2070552, "tid": 2107648, + "ts": 5333368856150.120, "dur": 36435.482, + "args": { + "External id": 297869,"Record function id": 0, "Ev Idx": 4492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368856259.027, "dur": 6.889, + "args": { + "External id": 297870,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368856276.480, "dur": 5.145, + "args": { + "External id": 297871,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368856295.750, "dur": 35405.268, + "args": { + "External id": 297872,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368856308.766, "dur": 35381.349, + "args": { + "External id": 297873,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368856345.843, "dur": 13.942, + "args": { + "External id": 297874,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368856365.877, "dur": 35283.206, + "args": { + "External id": 297875,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368856368.523, "dur": 35279.690, + "args": { + "External id": 297876,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368856372.372, "dur": 9.820, + "args": { + "External id": 297877,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368856384.005, "dur": 35259.725, + "args": { + "External id": 297878,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368891801.639, "dur": 10.923, + "args": { + "External id": 297879,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368891804.602, "dur": 7.596, + "args": { + "External id": 297880,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368891841.625, "dur": 445.014, + "args": { + "External id": 297881,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368891867.569, "dur": 413.234, + "args": { + "External id": 297882,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4505, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368891877.955, "dur": 396.433, + "args": { + "External id": 297883,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368892309.544, "dur": 1.932, + "args": { + "External id": 297884,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4507, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368892380.519, "dur": 6.896, + "args": { + "External id": 297885,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368892431.347, "dur": 1.720, + "args": { + "External id": 297886,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368892448.644, "dur": 1.647, + "args": { + "External id": 297887,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368892463.439, "dur": 1.351, + "args": { + "External id": 297888,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368892475.227, "dur": 1.552, + "args": { + "External id": 297889,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368892486.944, "dur": 1.803, + "args": { + "External id": 297890,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368892499.216, "dur": 1.576, + "args": { + "External id": 297891,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368892511.901, "dur": 1.093, + "args": { + "External id": 297892,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368892522.250, "dur": 1.332, + "args": { + "External id": 297893,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368892617.039, "dur": 2893.813, + "args": { + "External id": 297894,"Record function id": 0, "Ev Idx": 4517 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.1)", "pid": 2070552, "tid": 2107648, + "ts": 5333368892677.071, "dur": 1089.669, + "args": { + "External id": 297895,"Record function id": 0, "Ev Idx": 4518 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2070552, "tid": 2107648, + "ts": 5333368892691.680, "dur": 325.494, + "args": { + "External id": 297896,"Record function id": 0, "Ev Idx": 4519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368892784.945, "dur": 4.856, + "args": { + "External id": 297897,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368892793.358, "dur": 2.018, + "args": { + "External id": 297898,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368892796.952, "dur": 1.302, + "args": { + "External id": 297899,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368892799.916, "dur": 1.560, + "args": { + "External id": 297900,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368892802.945, "dur": 1.440, + "args": { + "External id": 297901,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368892805.866, "dur": 1.263, + "args": { + "External id": 297902,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368892808.739, "dur": 1.122, + "args": { + "External id": 297903,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368892811.107, "dur": 1.885, + "args": { + "External id": 297904,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368892814.597, "dur": 1.441, + "args": { + "External id": 297905,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368892817.536, "dur": 1.734, + "args": { + "External id": 297906,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368892837.205, "dur": 150.373, + "args": { + "External id": 297907,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368892853.398, "dur": 129.896, + "args": { + "External id": 297908,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368892867.198, "dur": 12.855, + "args": { + "External id": 297909,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368892883.498, "dur": 70.267, + "args": { + "External id": 297910,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368892886.161, "dur": 67.231, + "args": { + "External id": 297911,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368892889.695, "dur": 7.010, + "args": { + "External id": 297912,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368892898.442, "dur": 54.353, + "args": { + "External id": 297913,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4536 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::backward_prefetch for model.layers.0", "pid": 2070552, "tid": 2107648, + "ts": 5333368893111.281, "dur": 647.503, + "args": { + "External id": 297914,"Record function id": 0, "Ev Idx": 4537 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2070552, "tid": 2107648, + "ts": 5333368893129.233, "dur": 616.484, + "args": { + "External id": 297915,"Record function id": 0, "Ev Idx": 4538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368893219.853, "dur": 8.253, + "args": { + "External id": 297916,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368893247.344, "dur": 35.923, + "args": { + "External id": 297917,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368893252.814, "dur": 2.000, + "args": { + "External id": 297918,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368893257.214, "dur": 0.534, + "args": { + "External id": 297919,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368893259.576, "dur": 0.889, + "args": { + "External id": 297920,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368893262.938, "dur": 0.860, + "args": { + "External id": 297921,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368893265.955, "dur": 1.180, + "args": { + "External id": 297922,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368893268.768, "dur": 1.096, + "args": { + "External id": 297923,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368893271.060, "dur": 0.896, + "args": { + "External id": 297924,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368893274.230, "dur": 0.734, + "args": { + "External id": 297925,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368893276.730, "dur": 0.968, + "args": { + "External id": 297926,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368893296.237, "dur": 39.423, + "args": { + "External id": 297927,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2107648, + "ts": 5333368893369.158, "dur": 103.568, + "args": { + "External id": 297928,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368893379.234, "dur": 3.966, + "args": { + "External id": 297929,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2107648, + "ts": 5333368893388.508, "dur": 10.883, + "args": { + "External id": 297930,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2107648, + "ts": 5333368893392.863, "dur": 6.126, + "args": { + "External id": 297931,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368893396.301, "dur": 0.970, + "args": { + "External id": 297932,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2107648, + "ts": 5333368893405.715, "dur": 26.956, + "args": { + "External id": 297933,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368893408.154, "dur": 0.708, + "args": { + "External id": 297934,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368893410.828, "dur": 0.824, + "args": { + "External id": 297935,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368893412.974, "dur": 0.681, + "args": { + "External id": 297936,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368893414.894, "dur": 0.394, + "args": { + "External id": 297937,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368893416.448, "dur": 0.691, + "args": { + "External id": 297938,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368893418.084, "dur": 0.688, + "args": { + "External id": 297939,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368893420.371, "dur": 0.545, + "args": { + "External id": 297940,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368893422.314, "dur": 0.801, + "args": { + "External id": 297941,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368893424.054, "dur": 0.745, + "args": { + "External id": 297942,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368893446.080, "dur": 18.821, + "args": { + "External id": 297943,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368893519.936, "dur": 151.490, + "args": { + "External id": 297944,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368893542.135, "dur": 125.502, + "args": { + "External id": 297945,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4568, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368893551.194, "dur": 110.504, + "args": { + "External id": 297946,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368893687.829, "dur": 2.146, + "args": { + "External id": 297947,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4570, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368893775.156, "dur": 1713.783, + "args": { + "External id": 297948,"Sequence number": 1209164, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4571 + } + }, + { + "ph": "f", "id": 68, "pid": 2070552, "tid": 2107648, "ts": 5333368893775.156, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368893890.737, "dur": 109.178, + "args": { + "External id": 297949,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368894035.174, "dur": 41.167, + "args": { + "External id": 297950,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368894093.283, "dur": 50.798, + "args": { + "External id": 297951,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368894153.580, "dur": 61.540, + "args": { + "External id": 297952,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368894228.369, "dur": 55.921, + "args": { + "External id": 297953,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368894292.664, "dur": 27.832, + "args": { + "External id": 297954,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368894327.709, "dur": 41.898, + "args": { + "External id": 297955,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368894394.341, "dur": 25.967, + "args": { + "External id": 297956,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368894438.420, "dur": 30.742, + "args": { + "External id": 297957,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368894490.822, "dur": 25.850, + "args": { + "External id": 297958,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368894528.922, "dur": 16.575, + "args": { + "External id": 297959,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368894554.065, "dur": 28.953, + "args": { + "External id": 297960,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368894586.023, "dur": 31.779, + "args": { + "External id": 297961,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368894687.995, "dur": 179.371, + "args": { + "External id": 297962,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368894769.332, "dur": 6.115, + "args": { + "External id": 297963,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368894777.531, "dur": 3.340, + "args": { + "External id": 297964,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368894903.398, "dur": 25.526, + "args": { + "External id": 297965,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368894940.631, "dur": 15.977, + "args": { + "External id": 297966,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368894964.322, "dur": 41.148, + "args": { + "External id": 297967,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368895011.864, "dur": 54.829, + "args": { + "External id": 297968,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368895080.172, "dur": 24.471, + "args": { + "External id": 297969,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368895113.168, "dur": 32.270, + "args": { + "External id": 297970,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368895151.354, "dur": 44.253, + "args": { + "External id": 297971,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368895208.579, "dur": 46.272, + "args": { + "External id": 297972,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368895280.930, "dur": 29.484, + "args": { + "External id": 297973,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368895330.072, "dur": 27.619, + "args": { + "External id": 297974,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368895373.060, "dur": 18.594, + "args": { + "External id": 297975,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368895409.231, "dur": 15.216, + "args": { + "External id": 297976,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368895437.000, "dur": 18.098, + "args": { + "External id": 297977,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368895536.375, "dur": 14.926, + "args": { + "External id": 297978,"Record function id": 0, "Ev Idx": 4601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368895539.588, "dur": 10.696, + "args": { + "External id": 297979,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368895543.522, "dur": 5.875, + "args": { + "External id": 297980,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368895544.823, "dur": 4.483, + "args": { + "External id": 297981,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368895555.076, "dur": 17.928, + "args": { + "External id": 297982,"Record function id": 0, "Ev Idx": 4605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368895556.620, "dur": 15.848, + "args": { + "External id": 297983,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368895557.613, "dur": 14.293, + "args": { + "External id": 297984,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368895570.773, "dur": 1.026, + "args": { + "External id": 297985,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368895576.297, "dur": 4.047, + "args": { + "External id": 297986,"Record function id": 0, "Ev Idx": 4609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368895577.318, "dur": 2.597, + "args": { + "External id": 297987,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368895577.952, "dur": 1.378, + "args": { + "External id": 297988,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368895578.282, "dur": 0.962, + "args": { + "External id": 297989,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368895583.444, "dur": 4.970, + "args": { + "External id": 297990,"Record function id": 0, "Ev Idx": 4613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368895584.658, "dur": 3.362, + "args": { + "External id": 297991,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368895585.637, "dur": 1.653, + "args": { + "External id": 297992,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368895586.158, "dur": 1.049, + "args": { + "External id": 297993,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368895591.517, "dur": 4.070, + "args": { + "External id": 297994,"Record function id": 0, "Ev Idx": 4617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368895592.665, "dur": 2.507, + "args": { + "External id": 297995,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368895593.206, "dur": 1.201, + "args": { + "External id": 297996,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368895593.559, "dur": 0.772, + "args": { + "External id": 297997,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368895598.654, "dur": 3.978, + "args": { + "External id": 297998,"Record function id": 0, "Ev Idx": 4621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368895599.558, "dur": 2.637, + "args": { + "External id": 297999,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368895600.141, "dur": 1.500, + "args": { + "External id": 298000,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368895600.690, "dur": 0.874, + "args": { + "External id": 298001,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368895605.805, "dur": 4.018, + "args": { + "External id": 298002,"Record function id": 0, "Ev Idx": 4625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368895607.004, "dur": 2.373, + "args": { + "External id": 298003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368895607.597, "dur": 1.121, + "args": { + "External id": 298004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368895607.938, "dur": 0.705, + "args": { + "External id": 298005,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368895612.854, "dur": 4.096, + "args": { + "External id": 298006,"Record function id": 0, "Ev Idx": 4629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368895613.896, "dur": 2.632, + "args": { + "External id": 298007,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368895614.781, "dur": 1.324, + "args": { + "External id": 298008,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368895615.360, "dur": 0.669, + "args": { + "External id": 298009,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368895619.986, "dur": 41.834, + "args": { + "External id": 298010,"Record function id": 0, "Ev Idx": 4633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368895620.962, "dur": 39.525, + "args": { + "External id": 298011,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368895621.461, "dur": 37.611, + "args": { + "External id": 298012,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368895621.785, "dur": 36.684, + "args": { + "External id": 298013,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368895668.280, "dur": 37398.165, + "args": { + "External id": 298014,"Record function id": 0, "Sequence number": 1209163, "Fwd thread id": 1, "Ev Idx": 4637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368895669.885, "dur": 37387.461, + "args": { + "External id": 298015,"Sequence number": 1209163, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4638 + } + }, + { + "ph": "f", "id": 69, "pid": 2070552, "tid": 2107648, "ts": 5333368895669.885, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.1)", "pid": 2070552, "tid": 2107648, + "ts": 5333368895700.833, "dur": 42.173, + "args": { + "External id": 298016,"Record function id": 0, "Ev Idx": 4639 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.1)", "pid": 2070552, "tid": 2107648, + "ts": 5333368895750.877, "dur": 74.159, + "args": { + "External id": 298017,"Record function id": 0, "Ev Idx": 4640 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.1)", "pid": 2070552, "tid": 2107648, + "ts": 5333368895831.044, "dur": 37218.632, + "args": { + "External id": 298018,"Record function id": 0, "Ev Idx": 4641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368895925.341, "dur": 7.121, + "args": { + "External id": 298019,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368895942.344, "dur": 4.784, + "args": { + "External id": 298020,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368895960.711, "dur": 36203.979, + "args": { + "External id": 298021,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368895973.910, "dur": 36180.080, + "args": { + "External id": 298022,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368896022.250, "dur": 14.301, + "args": { + "External id": 298023,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368896043.015, "dur": 36070.801, + "args": { + "External id": 298024,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368896045.368, "dur": 36067.355, + "args": { + "External id": 298025,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368896048.984, "dur": 5.084, + "args": { + "External id": 298026,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368896055.674, "dur": 36052.843, + "args": { + "External id": 298027,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368932287.744, "dur": 13.272, + "args": { + "External id": 298028,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368932290.841, "dur": 9.667, + "args": { + "External id": 298029,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368932330.373, "dur": 409.730, + "args": { + "External id": 298030,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368932355.274, "dur": 379.480, + "args": { + "External id": 298031,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4654, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368932366.757, "dur": 361.440, + "args": { + "External id": 298032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368932762.206, "dur": 2.496, + "args": { + "External id": 298033,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4656, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368932831.334, "dur": 7.063, + "args": { + "External id": 298034,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368932883.918, "dur": 1.703, + "args": { + "External id": 298035,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368932901.063, "dur": 1.926, + "args": { + "External id": 298036,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368932917.432, "dur": 1.382, + "args": { + "External id": 298037,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368932931.183, "dur": 1.233, + "args": { + "External id": 298038,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368932943.074, "dur": 1.247, + "args": { + "External id": 298039,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368932956.154, "dur": 1.751, + "args": { + "External id": 298040,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368932968.933, "dur": 1.818, + "args": { + "External id": 298041,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368932983.078, "dur": 1.502, + "args": { + "External id": 298042,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368933082.324, "dur": 2259.294, + "args": { + "External id": 298043,"Record function id": 0, "Ev Idx": 4666 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_backward (model.layers.0)", "pid": 2070552, "tid": 2107648, + "ts": 5333368933103.861, "dur": 510.447, + "args": { + "External id": 298044,"Record function id": 0, "Ev Idx": 4667 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2070552, "tid": 2107648, + "ts": 5333368933119.131, "dur": 388.651, + "args": { + "External id": 298045,"Record function id": 0, "Ev Idx": 4668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368933241.088, "dur": 6.265, + "args": { + "External id": 298046,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368933252.233, "dur": 2.201, + "args": { + "External id": 298047,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368933257.322, "dur": 1.832, + "args": { + "External id": 298048,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368933266.412, "dur": 2.373, + "args": { + "External id": 298049,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368933271.094, "dur": 1.664, + "args": { + "External id": 298050,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368933275.098, "dur": 2.144, + "args": { + "External id": 298051,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368933279.746, "dur": 1.627, + "args": { + "External id": 298052,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368933283.475, "dur": 2.156, + "args": { + "External id": 298053,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368933288.219, "dur": 1.863, + "args": { + "External id": 298054,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368933292.371, "dur": 1.828, + "args": { + "External id": 298055,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368933317.557, "dur": 155.932, + "args": { + "External id": 298056,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368933333.207, "dur": 135.538, + "args": { + "External id": 298057,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368933348.029, "dur": 13.611, + "args": { + "External id": 298058,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368933365.663, "dur": 72.644, + "args": { + "External id": 298059,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368933368.441, "dur": 69.380, + "args": { + "External id": 298060,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368933372.333, "dur": 6.541, + "args": { + "External id": 298061,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368933380.471, "dur": 56.868, + "args": { + "External id": 298062,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368933661.892, "dur": 1655.730, + "args": { + "External id": 298063,"Sequence number": 1209162, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4686 + } + }, + { + "ph": "f", "id": 70, "pid": 2070552, "tid": 2107648, "ts": 5333368933661.892, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368933776.542, "dur": 109.332, + "args": { + "External id": 298064,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [5632, 1], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_add_mul_rsub_silu_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368933925.623, "dur": 39.256, + "args": { + "External id": 298065,"kernel_hash": "cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/g5/cg54rk6plkxazmxvreoge3ocdgo2qvrqueiwgyufr7vfeqr2c74q.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [65536, 5632], [16, 4096, 5632], [16, 4096, 5632], [16, 4096, 5632], []], "Ev Idx": 4688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::bmm", "pid": 2070552, "tid": 2107648, + "ts": 5333368933981.390, "dur": 49.578, + "args": { + "External id": 298066,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[0, 1, 2048], [0, 5632, 1], [11534336, 5632, 1]], "Input Dims": [[1, 2048, 65536], [1, 65536, 5632], [1, 2048, 5632]], "Ev Idx": 4689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368934040.646, "dur": 30.796, + "args": { + "External id": 298067,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368934077.602, "dur": 45.366, + "args": { + "External id": 298068,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368934128.928, "dur": 26.997, + "args": { + "External id": 298069,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 5632], [2048, 1], [2048, 1]], "Input Dims": [[5632, 65536], [65536, 2048], [5632, 2048]], "Ev Idx": 4692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368934163.449, "dur": 77.123, + "args": { + "External id": 298070,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368934272.940, "dur": 26.897, + "args": { + "External id": 298071,"kernel_hash": "c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/7d/c7d6ikm23maj7cij2sp6c2oloe5wj3zanm4rfia7egexs4c2tcj4.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], []], "Ev Idx": 4694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368934319.606, "dur": 30.888, + "args": { + "External id": 298072,"kernel_hash": "c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "True", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/2w/c2wohqfnvne34c5tgakplavenwgioemcknjvmkyrvzt7etgjrw3e.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368934372.109, "dur": 18.432, + "args": { + "External id": 298073,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368934403.312, "dur": 15.397, + "args": { + "External id": 298074,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368934427.752, "dur": 31.675, + "args": { + "External id": 298075,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368934462.938, "dur": 32.909, + "args": { + "External id": 298076,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_backward", "pid": 2070552, "tid": 2107648, + "ts": 5333368934525.643, "dur": 216.233, + "args": { + "External id": 298077,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar", "long int"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [131072, 4096, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [1]], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 32, 4096], [16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [2]], "Ev Idx": 4700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368934601.195, "dur": 6.395, + "args": { + "External id": 298078,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368934609.603, "dur": 3.312, + "args": { + "External id": 298079,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368934780.307, "dur": 26.057, + "args": { + "External id": 298080,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368934816.709, "dur": 16.506, + "args": { + "External id": 298081,"kernel_hash": "c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "True"], "kernel_file": "/tmp/torchinductor_cvm/73/c73dzro777p6nmsvaosny7z2ke7c7rhjdm7v3mge6ahuwop7r33f.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368934841.748, "dur": 45.962, + "args": { + "External id": 298082,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368934893.810, "dur": 38.683, + "args": { + "External id": 298083,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368934938.832, "dur": 20.991, + "args": { + "External id": 298084,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368934965.563, "dur": 29.741, + "args": { + "External id": 298085,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368935002.104, "dur": 23.094, + "args": { + "External id": 298086,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 2048], [2048, 1], [2048, 1]], "Input Dims": [[2048, 65536], [65536, 2048], [2048, 2048]], "Ev Idx": 4709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2107648, + "ts": 5333368935032.234, "dur": 30.080, + "args": { + "External id": 298087,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_4", "pid": 2070552, "tid": 2107648, + "ts": 5333368935080.973, "dur": 20.447, + "args": { + "External id": 298088,"kernel_hash": "cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/po/cpotd6f2xzswg2k3qkmpicmz5wr56ayxp2zmtkv3wwxnmhvuh27c.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [65536, 2048], []], "Ev Idx": 4711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_bwd_kernel_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368935119.104, "dur": 24.017, + "args": { + "External id": 298089,"kernel_hash": "cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "65536", "2048", "1", "497", "132", "True", "2048", "False", "False", "True", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/vo/cvoehw665bnkqcgff62yie4uuv6owshktwgyem2i6fz3k53ghbfg.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [2048], [65536, 2048], [65536, 2048], [132, 2048], [65536], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_red_fused_sum_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368935160.864, "dur": 39.196, + "args": { + "External id": 298090,"kernel_hash": "cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i", "grid": "grid(4096,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "4096", "66"], "kernel_file": "/tmp/torchinductor_cvm/qt/cqttdwmxmi7xxlumw6knirurelva7lorx4f2cngskp4tndgmoa4i.py", "kernel_backend": "triton", "Input type": ["float", "float", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [4096, 1, 2048], [], []], "Input Dims": [[132, 2048], [1, 2048, 2], [], []], "Ev Idx": 4713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_per_fused__to_copy_sum_3", "pid": 2070552, "tid": 2107648, + "ts": 5333368935226.207, "dur": 21.018, + "args": { + "External id": 298091,"kernel_hash": "clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf", "grid": "grid(2048,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "2048", "2"], "kernel_file": "/tmp/torchinductor_cvm/lm/clmdqbjhukazby66uunhcudss4usuoysflfipfma5m7uabebuwuf.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 2048], [2048, 1], [], []], "Input Dims": [[1, 2048, 2], [1, 2048], [], []], "Ev Idx": 4714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_5", "pid": 2070552, "tid": 2107648, + "ts": 5333368935264.491, "dur": 21.896, + "args": { + "External id": 298092,"kernel_hash": "coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/oi/coid2ps756p6tjl6izbakhrlrbxcmypnxzbwd5e5d5zd5jjd3anl.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[65536, 2048], [65536, 2048], [16, 4096, 2048], []], "Ev Idx": 4715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368935364.721, "dur": 16.436, + "args": { + "External id": 298093,"Record function id": 0, "Ev Idx": 4716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368935368.140, "dur": 12.053, + "args": { + "External id": 298094,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368935372.586, "dur": 6.822, + "args": { + "External id": 298095,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368935374.134, "dur": 5.185, + "args": { + "External id": 298096,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368935384.703, "dur": 4.808, + "args": { + "External id": 298097,"Record function id": 0, "Ev Idx": 4720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368935386.144, "dur": 2.935, + "args": { + "External id": 298098,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368935386.751, "dur": 1.638, + "args": { + "External id": 298099,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368935387.106, "dur": 1.216, + "args": { + "External id": 298100,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368935392.741, "dur": 4.934, + "args": { + "External id": 298101,"Record function id": 0, "Ev Idx": 4724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368935394.520, "dur": 2.672, + "args": { + "External id": 298102,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368935395.172, "dur": 1.460, + "args": { + "External id": 298103,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368935395.662, "dur": 0.896, + "args": { + "External id": 298104,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[2048]], "Ev Idx": 4727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368935400.777, "dur": 4.506, + "args": { + "External id": 298105,"Record function id": 0, "Ev Idx": 4728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368935402.528, "dur": 2.363, + "args": { + "External id": 298106,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368935403.302, "dur": 1.170, + "args": { + "External id": 298107,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368935403.626, "dur": 0.760, + "args": { + "External id": 298108,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[5632, 1]], "Input Dims": [[2048, 5632]], "Ev Idx": 4731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368935408.264, "dur": 4.560, + "args": { + "External id": 298109,"Record function id": 0, "Ev Idx": 4732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368935409.651, "dur": 2.740, + "args": { + "External id": 298110,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368935410.338, "dur": 1.353, + "args": { + "External id": 298111,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368935410.659, "dur": 0.961, + "args": { + "External id": 298112,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368935415.906, "dur": 4.867, + "args": { + "External id": 298113,"Record function id": 0, "Ev Idx": 4736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368935417.568, "dur": 2.812, + "args": { + "External id": 298114,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368935418.084, "dur": 1.751, + "args": { + "External id": 298115,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368935418.639, "dur": 1.113, + "args": { + "External id": 298116,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368935423.877, "dur": 4.444, + "args": { + "External id": 298117,"Record function id": 0, "Ev Idx": 4740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368935425.236, "dur": 2.631, + "args": { + "External id": 298118,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368935426.150, "dur": 1.283, + "args": { + "External id": 298119,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368935426.646, "dur": 0.715, + "args": { + "External id": 298120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368935431.524, "dur": 4.162, + "args": { + "External id": 298121,"Record function id": 0, "Ev Idx": 4744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368935432.918, "dur": 2.369, + "args": { + "External id": 298122,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368935433.449, "dur": 1.389, + "args": { + "External id": 298123,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368935433.932, "dur": 0.840, + "args": { + "External id": 298124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[5632, 2048]], "Ev Idx": 4747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368935438.919, "dur": 5.007, + "args": { + "External id": 298125,"Record function id": 0, "Ev Idx": 4748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368935440.258, "dur": 3.256, + "args": { + "External id": 298126,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368935441.079, "dur": 1.587, + "args": { + "External id": 298127,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368935441.848, "dur": 0.743, + "args": { + "External id": 298128,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[2048, 2048]], "Ev Idx": 4751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368935447.552, "dur": 37246.971, + "args": { + "External id": 298129,"Record function id": 0, "Sequence number": 1209161, "Fwd thread id": 1, "Ev Idx": 4752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368935449.026, "dur": 37235.395, + "args": { + "External id": 298130,"Sequence number": 1209161, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4753 + } + }, + { + "ph": "f", "id": 71, "pid": 2070552, "tid": 2107648, "ts": 5333368935449.026, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate (model.layers.0)", "pid": 2070552, "tid": 2107648, + "ts": 5333368935479.139, "dur": 39.431, + "args": { + "External id": 298131,"Record function id": 0, "Ev Idx": 4754 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard (model.layers.0)", "pid": 2070552, "tid": 2107648, + "ts": 5333368935526.403, "dur": 133.218, + "args": { + "External id": 298132,"Record function id": 0, "Ev Idx": 4755 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce (model.layers.0)", "pid": 2070552, "tid": 2107648, + "ts": 5333368935673.992, "dur": 37000.543, + "args": { + "External id": 298133,"Record function id": 0, "Ev Idx": 4756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368935776.460, "dur": 7.909, + "args": { + "External id": 298134,"Record function id": 0, "Concrete Inputs": ["[51384320]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368935794.378, "dur": 5.220, + "args": { + "External id": 298135,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368935813.515, "dur": 35996.913, + "args": { + "External id": 298136,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368935826.724, "dur": 35972.799, + "args": { + "External id": 298137,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[1], [2048, 1], [2048, 1], [2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], [], [], [6423040, 1]], "Input Dims": [[[2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], [], [], [8, 6423040]], "Ev Idx": 4760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368935877.228, "dur": 14.608, + "args": { + "External id": 298138,"Record function id": 0, "Concrete Inputs": ["[12602]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368935897.871, "dur": 35863.116, + "args": { + "External id": 298139,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], [], []], "Ev Idx": 4762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368935900.554, "dur": 35859.705, + "args": { + "External id": 298140,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[12602], [], [], [], [], [], []], "Ev Idx": 4763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368935904.354, "dur": 5.514, + "args": { + "External id": 298141,"Record function id": 0, "Concrete Inputs": ["[12602]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368935911.888, "dur": 35844.276, + "args": { + "External id": 298142,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[12602], [12602], []], "Ev Idx": 4765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368971905.962, "dur": 10.450, + "args": { + "External id": 298143,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[51384320], [], [], [], [], []], "Ev Idx": 4766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368971909.160, "dur": 6.895, + "args": { + "External id": 298144,"Record function id": 0, "Concrete Inputs": ["[6423040]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368971946.493, "dur": 388.928, + "args": { + "External id": 298145,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[6423040], [51384320], [], [], [], []], "Ev Idx": 4768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368971972.150, "dur": 357.884, + "args": { + "External id": 298146,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 6423040, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[51384320], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4769, "In msg nelems": 51384320 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368971982.598, "dur": 341.330, + "args": { + "External id": 298147,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[51384320]], "Ev Idx": 4770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368972356.859, "dur": 2.370, + "args": { + "External id": 298148,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4771, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368972424.219, "dur": 6.993, + "args": { + "External id": 298149,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368972475.861, "dur": 1.444, + "args": { + "External id": 298150,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368972493.142, "dur": 1.629, + "args": { + "External id": 298151,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "524544"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368972507.708, "dur": 1.255, + "args": { + "External id": 298152,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1048832"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368972521.945, "dur": 1.586, + "args": { + "External id": 298153,"Record function id": 0, "Concrete Inputs": ["", "[256, 2048]", "[2048, 1]", "1573120"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368972534.641, "dur": 1.077, + "args": { + "External id": 298154,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368972546.740, "dur": 1.267, + "args": { + "External id": 298155,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "2097664"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368972560.764, "dur": 1.701, + "args": { + "External id": 298156,"Record function id": 0, "Concrete Inputs": ["", "[704, 2048]", "[2048, 1]", "3539456"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368972573.061, "dur": 1.597, + "args": { + "External id": 298157,"Record function id": 0, "Concrete Inputs": ["", "[256, 5632]", "[5632, 1]", "4981248"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368972711.513, "dur": 272.550, + "args": { + "External id": 298158,"Record function id": 0, "Sequence number": 1209160, "Fwd thread id": 1, "Ev Idx": 4781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunctionBackward", "pid": 2070552, "tid": 2107648, + "ts": 5333368972714.191, "dur": 262.521, + "args": { + "External id": 298159,"Sequence number": 1209160, "Fwd thread id": 1, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4782 + } + }, + { + "ph": "f", "id": 72, "pid": 2070552, "tid": 2107648, "ts": 5333368972714.191, + "cat": "fwdbwd", "name": "fwdbwd", "bp": "e" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_0", "pid": 2070552, "tid": 2107648, + "ts": 5333368972835.131, "dur": 49.722, + "args": { + "External id": 298160,"kernel_hash": "cwtajazwhbnqqeu43trk7x5hwtkpkc7brvmwcfo3dd47qg6wqw5o", "grid": "grid(65536000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "65536000"], "kernel_file": "/tmp/torchinductor_cvm/wt/cwtajazwhbnqqeu43trk7x5hwtkpkc7brvmwcfo3dd47qg6wqw5o.py", "kernel_backend": "triton", "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 4783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_1", "pid": 2070552, "tid": 2107648, + "ts": 5333368972898.604, "dur": 27.208, + "args": { + "External id": 298161,"kernel_hash": "cbqt3vkbsukl3ofnzpcsscs2yqya6hwss2ivwbwkcdyw6xw34uhg", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bq/cbqt3vkbsukl3ofnzpcsscs2yqya6hwss2ivwbwkcdyw6xw34uhg.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "float", "Scalar"], "Input Strides": [[4096, 1], [8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096], [16, 4096, 2048], [32000, 2048], []], "Ev Idx": 4784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_dense_backward_2", "pid": 2070552, "tid": 2107648, + "ts": 5333368972940.648, "dur": 22.305, + "args": { + "External id": 298162,"kernel_hash": "cshlfxe4rl24cbt566rdpjcqclo3uwm54uyk4lptbb3krvpyopow", "grid": "grid(65536000,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "65536000"], "kernel_file": "/tmp/torchinductor_cvm/sh/cshlfxe4rl24cbt566rdpjcqclo3uwm54uyk4lptbb3krvpyopow.py", "kernel_backend": "triton", "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 4785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "autograd::engine::evaluate_function: torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368972992.804, "dur": 14.709, + "args": { + "External id": 298163,"Record function id": 0, "Ev Idx": 4786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "torch::autograd::AccumulateGrad", "pid": 2070552, "tid": 2107648, + "ts": 5333368972995.764, "dur": 10.614, + "args": { + "External id": 298164,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 4787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368972999.257, "dur": 6.192, + "args": { + "External id": 298165,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 4788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2107648, + "ts": 5333368973000.476, "dur": 4.839, + "args": { + "External id": 298166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 4789 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_post_backward_callback", "pid": 2070552, "tid": 2107648, + "ts": 5333368973029.197, "dur": 10730.922, + "args": { + "External id": 298167,"Record function id": 0, "Ev Idx": 4790 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_accumulate", "pid": 2070552, "tid": 2107648, + "ts": 5333368973047.342, "dur": 21.745, + "args": { + "External id": 298168,"Record function id": 0, "Ev Idx": 4791 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reshard", "pid": 2070552, "tid": 2107648, + "ts": 5333368973074.585, "dur": 62.882, + "args": { + "External id": 298169,"Record function id": 0, "Ev Idx": 4792 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_backward_reduce", "pid": 2070552, "tid": 2107648, + "ts": 5333368973143.105, "dur": 10255.278, + "args": { + "External id": 298170,"Record function id": 0, "Ev Idx": 4793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368973244.447, "dur": 8.460, + "args": { + "External id": 298171,"Record function id": 0, "Concrete Inputs": ["[196610048]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2107648, + "ts": 5333368973265.174, "dur": 6.137, + "args": { + "External id": 298172,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[196610048], []], "Ev Idx": 4795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368973290.281, "dur": 9304.805, + "args": { + "External id": 298173,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[2048, 1], [1], [2048, 1], [2048, 1]], [], [], [24576256, 1]], "Input Dims": [[[32000, 2048], [2048], [32000, 2048], [32000, 2048]], [], [], [8, 24576256]], "Ev Idx": 4796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_chunk_cat", "pid": 2070552, "tid": 2107648, + "ts": 5333368973304.919, "dur": 9277.662, + "args": { + "External id": 298174,"Record function id": 0, "Concrete Inputs": ["", "0", "8", ""], "Input type": ["TensorList", "Scalar", "Scalar", "float"], "Input Strides": [[[2048, 1], [1], [2048, 1], [2048, 1]], [], [], [24576256, 1]], "Input Dims": [[[32000, 2048], [2048], [32000, 2048], [32000, 2048]], [], [], [8, 24576256]], "Ev Idx": 4797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368973468.888, "dur": 13.965, + "args": { + "External id": 298175,"Record function id": 0, "Concrete Inputs": ["[48027]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2107648, + "ts": 5333368973535.063, "dur": 9004.226, + "args": { + "External id": 298176,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[48027], [], [], [], [], [], [], []], "Ev Idx": 4799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2107648, + "ts": 5333368973537.864, "dur": 9000.355, + "args": { + "External id": 298177,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[48027], [], [], [], [], [], []], "Ev Idx": 4800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368973541.833, "dur": 8.136, + "args": { + "External id": 298178,"Record function id": 0, "Concrete Inputs": ["[48027]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2107648, + "ts": 5333368973551.859, "dur": 8981.339, + "args": { + "External id": 298179,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[48027], [48027], []], "Ev Idx": 4802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368982719.951, "dur": 11.942, + "args": { + "External id": 298180,"Record function id": 0, "Concrete Inputs": ["", "[24576256]", "", "", "", "False"], "Input type": ["float", "ScalarList", "", "", "", "Scalar"], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[196610048], [], [], [], [], []], "Ev Idx": 4803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2107648, + "ts": 5333368982723.249, "dur": 8.143, + "args": { + "External id": 298181,"Record function id": 0, "Concrete Inputs": ["[24576256]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_reduce_scatter_base_", "pid": 2070552, "tid": 2107648, + "ts": 5333368982761.405, "dur": 369.212, + "args": { + "External id": 298182,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "-1"], "Input type": ["float", "float", "", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], [], []], "Input Dims": [[24576256], [196610048], [], [], [], []], "Ev Idx": 4805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368982786.281, "dur": 339.089, + "args": { + "External id": 298183,"Record function id": 0, "Collective name": "_reduce_scatter_base", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 24576256, "Process Group Name": "0", "Input type": ["float", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[196610048], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4806, "In msg nelems": 196610048 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_reduce_scatter_base", "pid": 2070552, "tid": 2107648, + "ts": 5333368982797.024, "dur": 323.098, + "args": { + "External id": 298184,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[196610048]], "Ev Idx": 4807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2107648, + "ts": 5333368983148.545, "dur": 1.911, + "args": { + "External id": 298185,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4808, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368983236.136, "dur": 7.890, + "args": { + "External id": 298186,"Record function id": 0, "Concrete Inputs": ["", "[4000, 2048]", "[2048, 1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368983297.004, "dur": 1.991, + "args": { + "External id": 298187,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "8192000"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368983316.081, "dur": 1.947, + "args": { + "External id": 298188,"Record function id": 0, "Concrete Inputs": ["", "[4000, 2048]", "[2048, 1]", "8192256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2107648, + "ts": 5333368983331.452, "dur": 1.558, + "args": { + "External id": 298189,"Record function id": 0, "Concrete Inputs": ["", "[4000, 2048]", "[2048, 1]", "16384256"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4812 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "ProfilerStep#14847", "pid": 2070552, "tid": 2070552, + "ts": 5333366952398.772, "dur": 2048324.914, + "args": { + "External id": 289281,"Record function id": 0, "Ev Idx": 4813 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.zero_grad#AdamW.zero_grad", "pid": 2070552, "tid": 2070552, + "ts": 5333366952429.425, "dur": 606.182, + "args": { + "External id": 289282,"Record function id": 0, "Ev Idx": 4814 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "enumerate(DataLoader)#_StatefulMultiProcessingDataLoaderIter.__next__", "pid": 2070552, "tid": 2070552, + "ts": 5333366953074.890, "dur": 1953.679, + "args": { + "External id": 289283,"Record function id": 0, "Ev Idx": 4815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366954015.920, "dur": 7.279, + "args": { + "External id": 289284,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2070552, "tid": 2070552, + "ts": 5333366954041.761, "dur": 5.803, + "args": { + "External id": 289285,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[16, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 4817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366954477.543, "dur": 2.593, + "args": { + "External id": 289286,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2070552, "tid": 2070552, + "ts": 5333366954489.402, "dur": 2.408, + "args": { + "External id": 289287,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[16, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 4819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366954925.725, "dur": 1.635, + "args": { + "External id": 289288,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::set_", "pid": 2070552, "tid": 2070552, + "ts": 5333366954931.932, "dur": 1.975, + "args": { + "External id": 289289,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "0", "[16, 8192]", "[8192, 1]"], "Input type": ["long int", "", "Scalar", "ScalarList", "ScalarList"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[0], [], [], [], []], "Ev Idx": 4821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333366955785.865, "dur": 16.393, + "args": { + "External id": 289290,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 4822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366955796.265, "dur": 2.404, + "args": { + "External id": 289291,"Record function id": 0, "Concrete Inputs": ["", "[16, 8192]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 4823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333366955803.758, "dur": 3.948, + "args": { + "External id": 289292,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 4824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366955805.581, "dur": 0.934, + "args": { + "External id": 289293,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 4825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333366955834.102, "dur": 535.465, + "args": { + "External id": 289294,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], [], [], []], "Ev Idx": 4826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333366955841.328, "dur": 527.532, + "args": { + "External id": 289295,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], [], []], "Ev Idx": 4827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366955850.772, "dur": 9.261, + "args": { + "External id": 289296,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "[4096, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333366955862.549, "dur": 504.429, + "args": { + "External id": 289297,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 4829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333366955872.467, "dur": 0.691, + "args": { + "External id": 289298,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 4830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2070552, "tid": 2070552, + "ts": 5333366955875.491, "dur": 6.259, + "args": { + "External id": 289299,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[8192, 1], [4096, 1]], "Input Dims": [[16, 4096], [16, 4096]], "Ev Idx": 4831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2070552, "tid": 2070552, + "ts": 5333366955878.379, "dur": 3.233, + "args": { + "External id": 289300,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], []], "Input Dims": [[16, 4096], [], []], "Ev Idx": 4832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366955880.779, "dur": 0.589, + "args": { + "External id": 289301,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 4833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2070552, "tid": 2070552, + "ts": 5333366955883.623, "dur": 175.033, + "args": { + "External id": 289302,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 4834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2070552, "tid": 2070552, + "ts": 5333366955886.156, "dur": 172.099, + "args": { + "External id": 289303,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 4835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333366955888.628, "dur": 16.808, + "args": { + "External id": 289304,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 4836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366955892.983, "dur": 11.965, + "args": { + "External id": 289305,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333366955906.494, "dur": 151.247, + "args": { + "External id": 289306,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 4838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333366956060.504, "dur": 302.381, + "args": { + "External id": 289307,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 4839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333366956387.112, "dur": 541.127, + "args": { + "External id": 289308,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], [], [], [], []], "Ev Idx": 4840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333366956388.776, "dur": 538.445, + "args": { + "External id": 289309,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], [], [], []], "Ev Idx": 4841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366956396.616, "dur": 9.557, + "args": { + "External id": 289310,"Record function id": 0, "Concrete Inputs": ["[16, 8192]", "[8192, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333366956409.339, "dur": 513.886, + "args": { + "External id": 289311,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[8192, 1], [8192, 1], []], "Input Dims": [[16, 8192], [16, 8192], []], "Ev Idx": 4843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2070552, "tid": 2070552, + "ts": 5333366956959.367, "dur": 53.601, + "args": { + "External id": 289312,"Record function id": 0, "Concrete Inputs": ["0", "4096", "", "", "", "False"], "Input type": ["Scalar", "Scalar", "", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366956965.321, "dur": 5.987, + "args": { + "External id": 289313,"Record function id": 0, "Concrete Inputs": ["[0]", "4", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::arange", "pid": 2070552, "tid": 2070552, + "ts": 5333366956973.241, "dur": 39.361, + "args": { + "External id": 289314,"Record function id": 0, "Concrete Inputs": ["0", "4096", "1", ""], "Input type": ["Scalar", "Scalar", "Scalar", "long int"], "Input Strides": [[], [], [], [1]], "Input Dims": [[], [], [], [0]], "Ev Idx": 4846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333366956978.022, "dur": 7.649, + "args": { + "External id": 289315,"Record function id": 0, "Concrete Inputs": ["", "[4096]", ""], "Input type": ["long int", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 4847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::repeat", "pid": 2070552, "tid": 2070552, + "ts": 5333366957024.279, "dur": 70.320, + "args": { + "External id": 289316,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[16, 1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4096], []], "Ev Idx": 4848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2070552, "tid": 2070552, + "ts": 5333366957031.862, "dur": 6.218, + "args": { + "External id": 289317,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[4096], [], []], "Ev Idx": 4849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366957035.991, "dur": 1.869, + "args": { + "External id": 289318,"Record function id": 0, "Concrete Inputs": ["", "[1, 4096]", "[4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[4096], [], [], []], "Ev Idx": 4850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366957039.021, "dur": 4.496, + "args": { + "External id": 289319,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::alias", "pid": 2070552, "tid": 2070552, + "ts": 5333366957046.078, "dur": 3.119, + "args": { + "External id": 289320,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[4096, 1]], "Input Dims": [[16, 4096]], "Ev Idx": 4852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2070552, "tid": 2070552, + "ts": 5333366957052.031, "dur": 5.884, + "args": { + "External id": 289321,"Record function id": 0, "Concrete Inputs": ["", "0", "1", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 4853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366957057.164, "dur": 0.588, + "args": { + "External id": 289322,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 1]", "[4096, 1, 4096]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[16, 4096], [], [], []], "Ev Idx": 4854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unfold", "pid": 2070552, "tid": 2070552, + "ts": 5333366957058.742, "dur": 3.711, + "args": { + "External id": 289323,"Record function id": 0, "Concrete Inputs": ["", "1", "4096", "4096"], "Input type": ["long int", "Scalar", "Scalar", "Scalar"], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[16, 4096, 1], [], [], []], "Ev Idx": 4855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366957061.677, "dur": 0.691, + "args": { + "External id": 289324,"Record function id": 0, "Concrete Inputs": ["", "[16, 1, 1, 4096]", "[4096, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1, 4096], [], [], []], "Input Dims": [[16, 4096, 1], [], [], []], "Ev Idx": 4856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand_as", "pid": 2070552, "tid": 2070552, + "ts": 5333366957064.117, "dur": 5.033, + "args": { + "External id": 289325,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["long int", "long int"], "Input Strides": [[4096, 1], [4096, 4096, 4096, 1]], "Input Dims": [[1, 4096], [16, 1, 1, 4096]], "Ev Idx": 4857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::expand", "pid": 2070552, "tid": 2070552, + "ts": 5333366957067.090, "dur": 1.958, + "args": { + "External id": 289326,"Record function id": 0, "Concrete Inputs": ["", "[16, 1, 1, 4096]", "False"], "Input type": ["long int", "ScalarList", "Scalar"], "Input Strides": [[4096, 1], [], []], "Input Dims": [[1, 4096], [], []], "Ev Idx": 4858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366957068.150, "dur": 0.807, + "args": { + "External id": 289327,"Record function id": 0, "Concrete Inputs": ["", "[16, 1, 1, 4096]", "[0, 4096, 4096, 1]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[4096, 1], [], [], []], "Input Dims": [[1, 4096], [], [], []], "Ev Idx": 4859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333366957070.257, "dur": 23.598, + "args": { + "External id": 289328,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 4096, 4096, 1], [0, 4096, 4096, 1], []], "Input Dims": [[16, 1, 1, 4096], [16, 1, 1, 4096], []], "Ev Idx": 4860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333366957104.105, "dur": 25.972, + "args": { + "External id": 289329,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], []], "Ev Idx": 4861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333366957105.581, "dur": 24.317, + "args": { + "External id": 289330,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "3", "", "", "", "False", ""], "Input type": ["long int", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[4096, 1], [], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], [], []], "Ev Idx": 4862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366957110.041, "dur": 3.061, + "args": { + "External id": 289331,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "[4096, 1]", "3", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333366957113.904, "dur": 15.599, + "args": { + "External id": 289332,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["int", "long int", "Scalar"], "Input Strides": [[4096, 1], [4096, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 4864 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::root_pre_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333366957259.482, "dur": 147.155, + "args": { + "External id": 289333,"Record function id": 0, "Ev Idx": 4865 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::inputs_to_device", "pid": 2070552, "tid": 2070552, + "ts": 5333366957344.114, "dur": 52.563, + "args": { + "External id": 289334,"Record function id": 0, "Ev Idx": 4866 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333366957414.030, "dur": 41.057, + "args": { + "External id": 289335,"Record function id": 0, "Ev Idx": 4867 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333366957463.332, "dur": 7981.506, + "args": { + "External id": 289336,"Record function id": 0, "Ev Idx": 4868 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather", "pid": 2070552, "tid": 2070552, + "ts": 5333366957472.717, "dur": 805.050, + "args": { + "External id": 289337,"Record function id": 0, "Ev Idx": 4869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366957540.797, "dur": 7.291, + "args": { + "External id": 289338,"Record function id": 0, "Concrete Inputs": ["[24576256]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333366957562.825, "dur": 16.912, + "args": { + "External id": 289339,"Record function id": 0, "Concrete Inputs": ["", "[8192000, 256, 8192000, 8192000]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[24576256], [], []], "Ev Idx": 4871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366957566.633, "dur": 1.638, + "args": { + "External id": 289340,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366957573.423, "dur": 0.245, + "args": { + "External id": 289341,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "8192000"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366957574.460, "dur": 0.441, + "args": { + "External id": 289342,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "8192256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366957575.660, "dur": 0.418, + "args": { + "External id": 289343,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "16384256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333366957592.666, "dur": 87.833, + "args": { + "External id": 289344,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1]], [[1], [1], [1], [1]], []], "Input Dims": [[[8192000], [256], [8192000], [8192000]], [[8192000], [256], [8192000], [8192000]], []], "Ev Idx": 4876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333366957715.724, "dur": 105.209, + "args": { + "External id": 289345,"Record function id": 0, "Concrete Inputs": ["", "[8192000, 256, 8192000, 8192000]", "24576256", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[8192000], [256], [8192000], [8192000]], [], [], [], [], [], []], "Ev Idx": 4877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366957729.594, "dur": 4.971, + "args": { + "External id": 289346,"Record function id": 0, "Concrete Inputs": ["[196610048]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333366957740.641, "dur": 11.421, + "args": { + "External id": 289347,"Record function id": 0, "Concrete Inputs": ["", "0", "122881280", "24576256"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[196610048], [], [], []], "Ev Idx": 4879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333366957744.918, "dur": 6.593, + "args": { + "External id": 289348,"Record function id": 0, "Concrete Inputs": ["", "0", "122881280", "147457536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[196610048], [], [], [], []], "Ev Idx": 4880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366957749.204, "dur": 0.553, + "args": { + "External id": 289349,"Record function id": 0, "Concrete Inputs": ["", "[24576256]", "[1]", "122881280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[196610048], [], [], []], "Ev Idx": 4881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333366957759.015, "dur": 11.187, + "args": { + "External id": 289350,"Record function id": 0, "Concrete Inputs": ["", "[8192000, 256, 8192000, 8192000]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[24576256], [], []], "Ev Idx": 4882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366957760.550, "dur": 0.471, + "args": { + "External id": 289351,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "122881280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366957765.098, "dur": 0.224, + "args": { + "External id": 289352,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "131073280"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366957765.985, "dur": 0.443, + "args": { + "External id": 289353,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "131073536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366957766.996, "dur": 0.366, + "args": { + "External id": 289354,"Record function id": 0, "Concrete Inputs": ["", "[8192000]", "[1]", "139265536"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[24576256], [], [], []], "Ev Idx": 4886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333366957780.934, "dur": 32.629, + "args": { + "External id": 289355,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1]], [[1], [1], [1], [1]], []], "Input Dims": [[[8192000], [256], [8192000], [8192000]], [[8192000], [256], [8192000], [8192000]], []], "Ev Idx": 4887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333366957871.532, "dur": 291.209, + "args": { + "External id": 289356,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[196610048], [24576256], [], [], []], "Ev Idx": 4888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333366957904.426, "dur": 253.631, + "args": { + "External id": 289357,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 196610048, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[24576256], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4889, "In msg nelems": 24576256 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333366957916.138, "dur": 236.201, + "args": { + "External id": 289358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[24576256]], "Ev Idx": 4890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333366958208.298, "dur": 3.372, + "args": { + "External id": 289359,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4891, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out", "pid": 2070552, "tid": 2070552, + "ts": 5333366958292.629, "dur": 7049.558, + "args": { + "External id": 289360,"Record function id": 0, "Ev Idx": 4892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366958375.039, "dur": 6.200, + "args": { + "External id": 289361,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[196610048], []], "Ev Idx": 4893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366958384.546, "dur": 1.092, + "args": { + "External id": 289362,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[65536000], []], "Ev Idx": 4894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366958387.179, "dur": 2.297, + "args": { + "External id": 289363,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366958391.326, "dur": 1.117, + "args": { + "External id": 289364,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[65536000], []], "Ev Idx": 4896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366958393.704, "dur": 0.659, + "args": { + "External id": 289365,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[65536000], []], "Ev Idx": 4897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333366958412.781, "dur": 6887.118, + "args": { + "External id": 289366,"Record function id": 0, "Concrete Inputs": ["", "[8192000, 256, 8192000, 8192000]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[24576256, 1], [], [], [[8192000, 1], [256, 1], [8192000, 1], [8192000, 1]]], "Input Dims": [[8, 24576256], [], [], [[8, 8192000], [8, 256], [8, 8192000], [8, 8192000]]], "Ev Idx": 4898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333366958429.438, "dur": 6862.813, + "args": { + "External id": 289367,"Record function id": 0, "Concrete Inputs": ["", "[8192000, 256, 8192000, 8192000]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[24576256, 1], [], [], [[8192000, 1], [256, 1], [8192000, 1], [8192000, 1]]], "Input Dims": [[8, 24576256], [], [], [[8, 8192000], [8, 256], [8, 8192000], [8, 8192000]]], "Ev Idx": 4899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366958445.928, "dur": 5.581, + "args": { + "External id": 289368,"Record function id": 0, "Concrete Inputs": ["[3447]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333366958458.343, "dur": 6799.034, + "args": { + "External id": 289369,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[3447], [], [], [], [], [], [], []], "Ev Idx": 4901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333366958460.659, "dur": 6796.103, + "args": { + "External id": 289370,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[3447], [], [], [], [], [], []], "Ev Idx": 4902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366958466.029, "dur": 5.118, + "args": { + "External id": 289371,"Record function id": 0, "Concrete Inputs": ["[3447]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333366958472.775, "dur": 6780.741, + "args": { + "External id": 289372,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[3447], [3447], []], "Ev Idx": 4904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333366965520.598, "dur": 26.074, + "args": { + "External id": 289373,"Record function id": 0, "Ev Idx": 4905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 0/0", "pid": 2070552, "tid": 2070552, + "ts": 5333366965548.031, "dur": 242.583, + "args": { + "External id": 289374,"Record function id": 0, "Ev Idx": 4906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333366965589.196, "dur": 192.044, + "args": { + "External id": 289375,"Sequence number": 1209160, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "long int"], "Input Strides": [[2048, 1], [4096, 1]], "Input Dims": [[32000, 2048], [16, 4096]], "Ev Idx": 4907 + } + }, + { + "ph": "s", "id": 72, "pid": 2070552, "tid": 2070552, "ts": 5333366965589.196, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_embedding_0", "pid": 2070552, "tid": 2070552, + "ts": 5333366965691.665, "dur": 52.003, + "args": { + "External id": 289376,"kernel_hash": "csqogzggybapwolkqtroddnjtkp7ckvlrazmt5khcreabxycjcwc", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/sq/csqogzggybapwolkqtroddnjtkp7ckvlrazmt5khcreabxycjcwc.py", "kernel_backend": "triton", "Input type": ["long int", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[4096, 1], [2048, 1], [8388608, 2048, 1], []], "Input Dims": [[16, 4096], [32000, 2048], [16, 4096, 2048], []], "Ev Idx": 4908 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333366965851.703, "dur": 58.449, + "args": { + "External id": 289377,"Record function id": 0, "Ev Idx": 4909 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.0)", "pid": 2070552, "tid": 2070552, + "ts": 5333366965920.694, "dur": 7183.707, + "args": { + "External id": 289378,"Record function id": 0, "Ev Idx": 4910 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.0)", "pid": 2070552, "tid": 2070552, + "ts": 5333366965927.829, "dur": 839.422, + "args": { + "External id": 289379,"Record function id": 0, "Ev Idx": 4911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366966004.083, "dur": 9.453, + "args": { + "External id": 289380,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333366966027.543, "dur": 39.241, + "args": { + "External id": 289381,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366966037.279, "dur": 2.316, + "args": { + "External id": 289382,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366966041.395, "dur": 1.889, + "args": { + "External id": 289383,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366966043.909, "dur": 2.390, + "args": { + "External id": 289384,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366966049.310, "dur": 0.410, + "args": { + "External id": 289385,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366966050.355, "dur": 0.298, + "args": { + "External id": 289386,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366966054.807, "dur": 0.562, + "args": { + "External id": 289387,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366966056.071, "dur": 0.486, + "args": { + "External id": 289388,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366966057.204, "dur": 0.159, + "args": { + "External id": 289389,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366966060.936, "dur": 0.162, + "args": { + "External id": 289390,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333366966077.102, "dur": 35.144, + "args": { + "External id": 289391,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333366966143.968, "dur": 128.988, + "args": { + "External id": 289392,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 4924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366966154.661, "dur": 4.472, + "args": { + "External id": 289393,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333366966164.219, "dur": 29.721, + "args": { + "External id": 289394,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333366966186.000, "dur": 7.501, + "args": { + "External id": 289395,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 4927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366966190.851, "dur": 0.950, + "args": { + "External id": 289396,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 4928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333366966201.660, "dur": 24.818, + "args": { + "External id": 289397,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366966203.137, "dur": 0.369, + "args": { + "External id": 289398,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366966206.126, "dur": 0.338, + "args": { + "External id": 289399,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366966209.261, "dur": 0.321, + "args": { + "External id": 289400,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366966211.562, "dur": 0.299, + "args": { + "External id": 289401,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366966212.418, "dur": 2.376, + "args": { + "External id": 289402,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366966215.519, "dur": 0.385, + "args": { + "External id": 289403,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366966217.767, "dur": 1.186, + "args": { + "External id": 289404,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366966221.540, "dur": 0.185, + "args": { + "External id": 289405,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366966222.395, "dur": 0.304, + "args": { + "External id": 289406,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333366966238.331, "dur": 24.678, + "args": { + "External id": 289407,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 4939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333366966325.253, "dur": 345.546, + "args": { + "External id": 289408,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 4940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333366966352.042, "dur": 313.715, + "args": { + "External id": 289409,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 4941, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333366966361.299, "dur": 296.806, + "args": { + "External id": 289410,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 4942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333366966696.853, "dur": 2.732, + "args": { + "External id": 289411,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 4943, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.0)", "pid": 2070552, "tid": 2070552, + "ts": 5333366966789.655, "dur": 6113.711, + "args": { + "External id": 289412,"Record function id": 0, "Ev Idx": 4944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366966890.002, "dur": 6.357, + "args": { + "External id": 289413,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 4945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366966899.941, "dur": 0.956, + "args": { + "External id": 289414,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366966902.395, "dur": 1.082, + "args": { + "External id": 289415,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366966905.177, "dur": 0.803, + "args": { + "External id": 289416,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366966907.338, "dur": 0.661, + "args": { + "External id": 289417,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366966909.268, "dur": 2.113, + "args": { + "External id": 289418,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 4950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366966912.815, "dur": 0.971, + "args": { + "External id": 289419,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 4951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366966917.794, "dur": 2.060, + "args": { + "External id": 289420,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366966921.159, "dur": 0.686, + "args": { + "External id": 289421,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366966923.376, "dur": 0.801, + "args": { + "External id": 289422,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 4954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333366966941.252, "dur": 5918.443, + "args": { + "External id": 289423,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333366966957.487, "dur": 5894.114, + "args": { + "External id": 289424,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 4956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366966980.648, "dur": 13.734, + "args": { + "External id": 289425,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333366966998.319, "dur": 5819.872, + "args": { + "External id": 289426,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 4958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333366967000.812, "dur": 5816.719, + "args": { + "External id": 289427,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 4959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366967006.031, "dur": 6.135, + "args": { + "External id": 289428,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333366967013.905, "dur": 5800.816, + "args": { + "External id": 289429,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 4961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333366973050.082, "dur": 30.258, + "args": { + "External id": 289430,"Sequence number": 1209161, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 4962 + } + }, + { + "ph": "s", "id": 71, "pid": 2070552, "tid": 2070552, "ts": 5333366973050.082, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333366973066.037, "dur": 9.438, + "args": { + "External id": 289431,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 4963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366973070.573, "dur": 4.628, + "args": { + "External id": 289432,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 4964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333366973142.889, "dur": 115.771, + "args": { + "External id": 289433,"Record function id": 0, "Ev Idx": 4965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333366973261.406, "dur": 1137.548, + "args": { + "External id": 289434,"Record function id": 0, "Ev Idx": 4966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333366973300.882, "dur": 1083.682, + "args": { + "External id": 289435,"Sequence number": 1209162, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 4967 + } + }, + { + "ph": "s", "id": 70, "pid": 2070552, "tid": 2070552, "ts": 5333366973300.882, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333366973371.865, "dur": 46.127, + "args": { + "External id": 289436,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 4968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333366973431.052, "dur": 106.932, + "args": { + "External id": 289437,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333366973546.831, "dur": 38.002, + "args": { + "External id": 289438,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333366973594.147, "dur": 90.426, + "args": { + "External id": 289439,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333366973716.323, "dur": 28.328, + "args": { + "External id": 289440,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333366973764.776, "dur": 15.761, + "args": { + "External id": 289441,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 4973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333366973798.941, "dur": 125.948, + "args": { + "External id": 289442,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 4974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333366973850.412, "dur": 10.297, + "args": { + "External id": 289443,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 4975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366973854.991, "dur": 4.866, + "args": { + "External id": 289444,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366973862.960, "dur": 3.324, + "args": { + "External id": 289445,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366973868.757, "dur": 0.921, + "args": { + "External id": 289446,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366973871.822, "dur": 2.508, + "args": { + "External id": 289447,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333366973936.068, "dur": 51.298, + "args": { + "External id": 289448,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 4980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333366974017.930, "dur": 29.007, + "args": { + "External id": 289449,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 4981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333366974055.508, "dur": 41.413, + "args": { + "External id": 289450,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333366974105.331, "dur": 34.910, + "args": { + "External id": 289451,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 4983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333366974162.608, "dur": 55.189, + "args": { + "External id": 289452,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 4984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333366974225.517, "dur": 37.130, + "args": { + "External id": 289453,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 4985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333366974289.908, "dur": 19.672, + "args": { + "External id": 289454,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 4986 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.0)", "pid": 2070552, "tid": 2070552, + "ts": 5333366974462.321, "dur": 82.764, + "args": { + "External id": 289455,"Record function id": 0, "Ev Idx": 4987 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333366974616.419, "dur": 105.493, + "args": { + "External id": 289456,"Record function id": 0, "Ev Idx": 4988 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.1)", "pid": 2070552, "tid": 2070552, + "ts": 5333366974733.351, "dur": 18998.524, + "args": { + "External id": 289457,"Record function id": 0, "Ev Idx": 4989 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.1)", "pid": 2070552, "tid": 2070552, + "ts": 5333366974743.596, "dur": 938.347, + "args": { + "External id": 289458,"Record function id": 0, "Ev Idx": 4990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366974830.156, "dur": 9.642, + "args": { + "External id": 289459,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 4991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333366974854.166, "dur": 37.405, + "args": { + "External id": 289460,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 4992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366974860.848, "dur": 3.779, + "args": { + "External id": 289461,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366974868.704, "dur": 0.449, + "args": { + "External id": 289462,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366974870.789, "dur": 0.215, + "args": { + "External id": 289463,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366974873.596, "dur": 0.313, + "args": { + "External id": 289464,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366974874.556, "dur": 0.423, + "args": { + "External id": 289465,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366974876.548, "dur": 2.623, + "args": { + "External id": 289466,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366974879.823, "dur": 1.529, + "args": { + "External id": 289467,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 4999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366974881.922, "dur": 0.273, + "args": { + "External id": 289468,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366974885.637, "dur": 0.168, + "args": { + "External id": 289469,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333366974902.960, "dur": 44.767, + "args": { + "External id": 289470,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333366974980.278, "dur": 110.410, + "args": { + "External id": 289471,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366974992.319, "dur": 3.863, + "args": { + "External id": 289472,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333366975001.440, "dur": 9.914, + "args": { + "External id": 289473,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333366975005.984, "dur": 4.908, + "args": { + "External id": 289474,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366975009.133, "dur": 0.453, + "args": { + "External id": 289475,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333366975017.395, "dur": 27.310, + "args": { + "External id": 289476,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366975020.495, "dur": 0.302, + "args": { + "External id": 289477,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366975021.853, "dur": 0.281, + "args": { + "External id": 289478,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366975024.659, "dur": 0.186, + "args": { + "External id": 289479,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366975027.132, "dur": 1.307, + "args": { + "External id": 289480,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366975029.154, "dur": 0.338, + "args": { + "External id": 289481,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366975032.306, "dur": 0.366, + "args": { + "External id": 289482,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366975034.811, "dur": 0.540, + "args": { + "External id": 289483,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366975035.952, "dur": 2.496, + "args": { + "External id": 289484,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366975040.294, "dur": 0.166, + "args": { + "External id": 289485,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333366975058.490, "dur": 24.137, + "args": { + "External id": 289486,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333366975140.335, "dur": 404.039, + "args": { + "External id": 289487,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333366975214.907, "dur": 324.162, + "args": { + "External id": 289488,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5020, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333366975225.850, "dur": 306.859, + "args": { + "External id": 289489,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333366975568.577, "dur": 2.507, + "args": { + "External id": 289490,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5022, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.1)", "pid": 2070552, "tid": 2070552, + "ts": 5333366975704.787, "dur": 17797.749, + "args": { + "External id": 289491,"Record function id": 0, "Ev Idx": 5023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366975811.544, "dur": 6.709, + "args": { + "External id": 289492,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366975821.893, "dur": 0.921, + "args": { + "External id": 289493,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366975824.557, "dur": 1.935, + "args": { + "External id": 289494,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366975828.282, "dur": 0.794, + "args": { + "External id": 289495,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366975830.406, "dur": 0.850, + "args": { + "External id": 289496,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366975834.723, "dur": 0.974, + "args": { + "External id": 289497,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366975837.173, "dur": 0.878, + "args": { + "External id": 289498,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366975839.619, "dur": 1.890, + "args": { + "External id": 289499,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366975843.161, "dur": 0.777, + "args": { + "External id": 289500,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366975847.047, "dur": 0.619, + "args": { + "External id": 289501,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333366975866.617, "dur": 17590.309, + "args": { + "External id": 289502,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333366975881.314, "dur": 17567.365, + "args": { + "External id": 289503,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366975901.436, "dur": 14.657, + "args": { + "External id": 289504,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333366975920.157, "dur": 17491.964, + "args": { + "External id": 289505,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333366975922.874, "dur": 17488.515, + "args": { + "External id": 289506,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366975928.234, "dur": 5.579, + "args": { + "External id": 289507,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333366975935.510, "dur": 17472.380, + "args": { + "External id": 289508,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333366993674.127, "dur": 31.492, + "args": { + "External id": 289509,"Sequence number": 1209163, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5041 + } + }, + { + "ph": "s", "id": 69, "pid": 2070552, "tid": 2070552, "ts": 5333366993674.127, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333366993691.697, "dur": 9.280, + "args": { + "External id": 289510,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366993695.491, "dur": 5.143, + "args": { + "External id": 289511,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333366993772.863, "dur": 82.787, + "args": { + "External id": 289512,"Record function id": 0, "Ev Idx": 5044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333366993857.134, "dur": 1062.840, + "args": { + "External id": 289513,"Record function id": 0, "Ev Idx": 5045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333366993893.927, "dur": 1011.834, + "args": { + "External id": 289514,"Sequence number": 1209164, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5046 + } + }, + { + "ph": "s", "id": 68, "pid": 2070552, "tid": 2070552, "ts": 5333366993893.927, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333366993961.822, "dur": 45.247, + "args": { + "External id": 289515,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333366994019.253, "dur": 106.464, + "args": { + "External id": 289516,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333366994134.578, "dur": 54.721, + "args": { + "External id": 289517,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333366994202.440, "dur": 33.818, + "args": { + "External id": 289518,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333366994261.324, "dur": 26.220, + "args": { + "External id": 289519,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333366994304.272, "dur": 13.918, + "args": { + "External id": 289520,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333366994337.081, "dur": 123.122, + "args": { + "External id": 289521,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333366994385.401, "dur": 12.217, + "args": { + "External id": 289522,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366994390.932, "dur": 5.938, + "args": { + "External id": 289523,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366994400.092, "dur": 4.580, + "args": { + "External id": 289524,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366994405.739, "dur": 0.818, + "args": { + "External id": 289525,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366994408.714, "dur": 2.618, + "args": { + "External id": 289526,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333366994469.727, "dur": 43.213, + "args": { + "External id": 289527,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333366994543.511, "dur": 26.316, + "args": { + "External id": 289528,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333366994576.647, "dur": 39.703, + "args": { + "External id": 289529,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333366994665.950, "dur": 39.992, + "args": { + "External id": 289530,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333366994731.451, "dur": 24.794, + "args": { + "External id": 289531,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333366994761.949, "dur": 33.308, + "args": { + "External id": 289532,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333366994816.573, "dur": 17.855, + "args": { + "External id": 289533,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5065 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.1)", "pid": 2070552, "tid": 2070552, + "ts": 5333366994984.216, "dur": 81.127, + "args": { + "External id": 289534,"Record function id": 0, "Ev Idx": 5066 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333366995139.675, "dur": 70.742, + "args": { + "External id": 289535,"Record function id": 0, "Ev Idx": 5067 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.2)", "pid": 2070552, "tid": 2070552, + "ts": 5333366995221.565, "dur": 18291.177, + "args": { + "External id": 289536,"Record function id": 0, "Ev Idx": 5068 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.2)", "pid": 2070552, "tid": 2070552, + "ts": 5333366995233.198, "dur": 817.709, + "args": { + "External id": 289537,"Record function id": 0, "Ev Idx": 5069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366995318.519, "dur": 9.265, + "args": { + "External id": 289538,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333366995342.013, "dur": 34.835, + "args": { + "External id": 289539,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366995347.121, "dur": 2.476, + "args": { + "External id": 289540,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366995355.077, "dur": 0.268, + "args": { + "External id": 289541,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366995356.124, "dur": 0.329, + "args": { + "External id": 289542,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366995357.123, "dur": 0.341, + "args": { + "External id": 289543,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366995361.054, "dur": 1.405, + "args": { + "External id": 289544,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366995363.409, "dur": 0.591, + "args": { + "External id": 289545,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366995364.886, "dur": 1.955, + "args": { + "External id": 289546,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366995368.336, "dur": 0.524, + "args": { + "External id": 289547,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366995370.533, "dur": 0.361, + "args": { + "External id": 289548,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333366995387.577, "dur": 42.844, + "args": { + "External id": 289549,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333366995463.533, "dur": 108.981, + "args": { + "External id": 289550,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366995475.913, "dur": 3.848, + "args": { + "External id": 289551,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333366995484.718, "dur": 10.929, + "args": { + "External id": 289552,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333366995489.270, "dur": 5.921, + "args": { + "External id": 289553,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366995493.325, "dur": 0.565, + "args": { + "External id": 289554,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333366995501.705, "dur": 29.088, + "args": { + "External id": 289555,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366995504.040, "dur": 2.603, + "args": { + "External id": 289556,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366995507.910, "dur": 1.394, + "args": { + "External id": 289557,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366995510.549, "dur": 0.459, + "args": { + "External id": 289558,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366995514.190, "dur": 0.230, + "args": { + "External id": 289559,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366995515.731, "dur": 0.336, + "args": { + "External id": 289560,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366995517.358, "dur": 0.371, + "args": { + "External id": 289561,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366995521.141, "dur": 0.479, + "args": { + "External id": 289562,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366995522.858, "dur": 0.392, + "args": { + "External id": 289563,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366995524.580, "dur": 2.418, + "args": { + "External id": 289564,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333366995541.007, "dur": 22.753, + "args": { + "External id": 289565,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333366995666.547, "dur": 291.618, + "args": { + "External id": 289566,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333366995699.383, "dur": 254.338, + "args": { + "External id": 289567,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5099, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333366995712.355, "dur": 235.693, + "args": { + "External id": 289568,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333366995979.540, "dur": 2.762, + "args": { + "External id": 289569,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5101, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.2)", "pid": 2070552, "tid": 2070552, + "ts": 5333366996071.289, "dur": 17233.664, + "args": { + "External id": 289570,"Record function id": 0, "Ev Idx": 5102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366996192.731, "dur": 6.561, + "args": { + "External id": 289571,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366996205.226, "dur": 0.944, + "args": { + "External id": 289572,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366996208.280, "dur": 0.781, + "args": { + "External id": 289573,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366996211.170, "dur": 0.822, + "args": { + "External id": 289574,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366996213.413, "dur": 0.840, + "args": { + "External id": 289575,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366996215.605, "dur": 0.913, + "args": { + "External id": 289576,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366996219.928, "dur": 0.830, + "args": { + "External id": 289577,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366996222.251, "dur": 2.720, + "args": { + "External id": 289578,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366996226.779, "dur": 0.858, + "args": { + "External id": 289579,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333366996229.430, "dur": 0.871, + "args": { + "External id": 289580,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333366996250.982, "dur": 17001.581, + "args": { + "External id": 289581,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333366996267.054, "dur": 16977.549, + "args": { + "External id": 289582,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333366996286.958, "dur": 12.940, + "args": { + "External id": 289583,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333366996303.205, "dur": 16905.661, + "args": { + "External id": 289584,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333366996305.712, "dur": 16902.377, + "args": { + "External id": 289585,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333366996311.797, "dur": 5.541, + "args": { + "External id": 289586,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333366996319.052, "dur": 16885.629, + "args": { + "External id": 289587,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367013457.647, "dur": 30.003, + "args": { + "External id": 289588,"Sequence number": 1209165, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5120 + } + }, + { + "ph": "s", "id": 67, "pid": 2070552, "tid": 2070552, "ts": 5333367013457.647, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367013473.488, "dur": 9.503, + "args": { + "External id": 289589,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367013477.585, "dur": 5.101, + "args": { + "External id": 289590,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367013552.240, "dur": 118.978, + "args": { + "External id": 289591,"Record function id": 0, "Ev Idx": 5123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367013674.816, "dur": 1071.942, + "args": { + "External id": 289592,"Record function id": 0, "Ev Idx": 5124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367013716.428, "dur": 1016.602, + "args": { + "External id": 289593,"Sequence number": 1209166, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5125 + } + }, + { + "ph": "s", "id": 66, "pid": 2070552, "tid": 2070552, "ts": 5333367013716.428, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367013787.179, "dur": 46.820, + "args": { + "External id": 289594,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367013846.134, "dur": 102.750, + "args": { + "External id": 289595,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367013957.249, "dur": 37.636, + "args": { + "External id": 289596,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367014004.206, "dur": 30.516, + "args": { + "External id": 289597,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367014059.195, "dur": 25.590, + "args": { + "External id": 289598,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367014103.559, "dur": 14.364, + "args": { + "External id": 289599,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367014136.234, "dur": 148.653, + "args": { + "External id": 289600,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367014203.568, "dur": 12.863, + "args": { + "External id": 289601,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367014208.604, "dur": 6.843, + "args": { + "External id": 289602,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367014220.120, "dur": 5.551, + "args": { + "External id": 289603,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367014226.926, "dur": 0.971, + "args": { + "External id": 289604,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367014230.273, "dur": 2.599, + "args": { + "External id": 289605,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367014295.727, "dur": 50.049, + "args": { + "External id": 289606,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367014374.690, "dur": 28.626, + "args": { + "External id": 289607,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367014410.678, "dur": 39.793, + "args": { + "External id": 289608,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367014458.113, "dur": 35.369, + "args": { + "External id": 289609,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367014514.520, "dur": 24.908, + "args": { + "External id": 289610,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367014545.475, "dur": 33.461, + "args": { + "External id": 289611,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367014599.060, "dur": 18.819, + "args": { + "External id": 289612,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5144 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.2)", "pid": 2070552, "tid": 2070552, + "ts": 5333367014811.146, "dur": 81.274, + "args": { + "External id": 289613,"Record function id": 0, "Ev Idx": 5145 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367014966.040, "dur": 48.470, + "args": { + "External id": 289614,"Record function id": 0, "Ev Idx": 5146 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.3)", "pid": 2070552, "tid": 2070552, + "ts": 5333367015024.479, "dur": 18246.767, + "args": { + "External id": 289615,"Record function id": 0, "Ev Idx": 5147 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.3)", "pid": 2070552, "tid": 2070552, + "ts": 5333367015034.059, "dur": 890.350, + "args": { + "External id": 289616,"Record function id": 0, "Ev Idx": 5148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367015115.958, "dur": 8.635, + "args": { + "External id": 289617,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367015140.223, "dur": 64.745, + "args": { + "External id": 289618,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367015145.586, "dur": 2.234, + "args": { + "External id": 289619,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367015152.470, "dur": 0.384, + "args": { + "External id": 289620,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367015154.229, "dur": 0.231, + "args": { + "External id": 289621,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367015156.275, "dur": 0.251, + "args": { + "External id": 289622,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367015159.676, "dur": 0.215, + "args": { + "External id": 289623,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367015161.516, "dur": 0.298, + "args": { + "External id": 289624,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367015163.411, "dur": 27.518, + "args": { + "External id": 289625,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367015193.626, "dur": 0.406, + "args": { + "External id": 289626,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367015198.190, "dur": 0.512, + "args": { + "External id": 289627,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367015217.253, "dur": 46.218, + "args": { + "External id": 289628,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367015298.614, "dur": 120.494, + "args": { + "External id": 289629,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367015311.533, "dur": 5.203, + "args": { + "External id": 289630,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367015321.786, "dur": 12.630, + "args": { + "External id": 289631,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367015326.419, "dur": 7.514, + "args": { + "External id": 289632,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367015330.215, "dur": 2.292, + "args": { + "External id": 289633,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367015344.561, "dur": 29.377, + "args": { + "External id": 289634,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367015346.800, "dur": 0.565, + "args": { + "External id": 289635,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367015349.101, "dur": 2.472, + "args": { + "External id": 289636,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367015352.699, "dur": 0.395, + "args": { + "External id": 289637,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367015354.497, "dur": 1.963, + "args": { + "External id": 289638,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367015359.664, "dur": 0.381, + "args": { + "External id": 289639,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367015361.211, "dur": 0.581, + "args": { + "External id": 289640,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367015363.311, "dur": 0.380, + "args": { + "External id": 289641,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367015366.438, "dur": 0.814, + "args": { + "External id": 289642,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367015368.744, "dur": 0.586, + "args": { + "External id": 289643,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367015386.870, "dur": 24.249, + "args": { + "External id": 289644,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367015473.854, "dur": 352.230, + "args": { + "External id": 289645,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367015506.455, "dur": 314.309, + "args": { + "External id": 289646,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5178, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367015517.024, "dur": 297.641, + "args": { + "External id": 289647,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367015852.014, "dur": 2.411, + "args": { + "External id": 289648,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5180, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.3)", "pid": 2070552, "tid": 2070552, + "ts": 5333367015945.826, "dur": 17101.495, + "args": { + "External id": 289649,"Record function id": 0, "Ev Idx": 5181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367016044.510, "dur": 6.152, + "args": { + "External id": 289650,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367016054.749, "dur": 1.030, + "args": { + "External id": 289651,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367016057.730, "dur": 3.141, + "args": { + "External id": 289652,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367016062.715, "dur": 0.907, + "args": { + "External id": 289653,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367016065.192, "dur": 0.988, + "args": { + "External id": 289654,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367016067.628, "dur": 0.949, + "args": { + "External id": 289655,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367016072.142, "dur": 1.076, + "args": { + "External id": 289656,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367016074.631, "dur": 1.896, + "args": { + "External id": 289657,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367016078.087, "dur": 1.145, + "args": { + "External id": 289658,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367016080.859, "dur": 1.005, + "args": { + "External id": 289659,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367016101.810, "dur": 16898.959, + "args": { + "External id": 289660,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367016116.557, "dur": 16876.944, + "args": { + "External id": 289661,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367016134.849, "dur": 13.764, + "args": { + "External id": 289662,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367016152.508, "dur": 16808.310, + "args": { + "External id": 289663,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367016154.960, "dur": 16805.107, + "args": { + "External id": 289664,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367016160.619, "dur": 37.231, + "args": { + "External id": 289665,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367016200.441, "dur": 16756.629, + "args": { + "External id": 289666,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367033209.143, "dur": 33.833, + "args": { + "External id": 289667,"Sequence number": 1209167, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5199 + } + }, + { + "ph": "s", "id": 65, "pid": 2070552, "tid": 2070552, "ts": 5333367033209.143, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367033228.501, "dur": 9.483, + "args": { + "External id": 289668,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367033232.260, "dur": 5.323, + "args": { + "External id": 289669,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367033312.710, "dur": 85.660, + "args": { + "External id": 289670,"Record function id": 0, "Ev Idx": 5202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367033400.082, "dur": 1077.418, + "args": { + "External id": 289671,"Record function id": 0, "Ev Idx": 5203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367033442.315, "dur": 1021.713, + "args": { + "External id": 289672,"Sequence number": 1209168, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5204 + } + }, + { + "ph": "s", "id": 64, "pid": 2070552, "tid": 2070552, "ts": 5333367033442.315, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367033510.642, "dur": 44.537, + "args": { + "External id": 289673,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367033569.358, "dur": 133.899, + "args": { + "External id": 289674,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367033717.703, "dur": 42.005, + "args": { + "External id": 289675,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367033767.625, "dur": 31.197, + "args": { + "External id": 289676,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367033827.426, "dur": 26.521, + "args": { + "External id": 289677,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367033871.811, "dur": 13.164, + "args": { + "External id": 289678,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367033904.846, "dur": 126.959, + "args": { + "External id": 289679,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367033954.154, "dur": 11.333, + "args": { + "External id": 289680,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367033958.850, "dur": 5.676, + "args": { + "External id": 289681,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367033968.184, "dur": 5.229, + "args": { + "External id": 289682,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367033974.894, "dur": 1.125, + "args": { + "External id": 289683,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367033978.799, "dur": 5.482, + "args": { + "External id": 289684,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367034042.712, "dur": 44.153, + "args": { + "External id": 289685,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367034116.416, "dur": 28.188, + "args": { + "External id": 289686,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367034154.641, "dur": 64.283, + "args": { + "External id": 289687,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367034230.299, "dur": 37.554, + "args": { + "External id": 289688,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367034291.322, "dur": 24.610, + "args": { + "External id": 289689,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367034321.925, "dur": 33.704, + "args": { + "External id": 289690,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367034375.565, "dur": 19.295, + "args": { + "External id": 289691,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5223 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.3)", "pid": 2070552, "tid": 2070552, + "ts": 5333367034541.912, "dur": 124.025, + "args": { + "External id": 289692,"Record function id": 0, "Ev Idx": 5224 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367034747.903, "dur": 49.016, + "args": { + "External id": 289693,"Record function id": 0, "Ev Idx": 5225 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.4)", "pid": 2070552, "tid": 2070552, + "ts": 5333367034806.476, "dur": 18700.617, + "args": { + "External id": 289694,"Record function id": 0, "Ev Idx": 5226 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.4)", "pid": 2070552, "tid": 2070552, + "ts": 5333367034814.978, "dur": 874.725, + "args": { + "External id": 289695,"Record function id": 0, "Ev Idx": 5227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367034896.709, "dur": 9.366, + "args": { + "External id": 289696,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367034920.620, "dur": 39.430, + "args": { + "External id": 289697,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367034926.300, "dur": 2.550, + "args": { + "External id": 289698,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367034933.731, "dur": 0.743, + "args": { + "External id": 289699,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367034936.185, "dur": 0.731, + "args": { + "External id": 289700,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367034938.726, "dur": 0.338, + "args": { + "External id": 289701,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367034942.096, "dur": 0.658, + "args": { + "External id": 289702,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367034944.353, "dur": 0.657, + "args": { + "External id": 289703,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367034946.302, "dur": 4.228, + "args": { + "External id": 289704,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367034952.167, "dur": 0.573, + "args": { + "External id": 289705,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367034953.820, "dur": 0.634, + "args": { + "External id": 289706,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367034970.603, "dur": 46.141, + "args": { + "External id": 289707,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367035050.279, "dur": 141.341, + "args": { + "External id": 289708,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367035062.921, "dur": 3.647, + "args": { + "External id": 289709,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367035071.468, "dur": 10.974, + "args": { + "External id": 289710,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367035075.876, "dur": 6.144, + "args": { + "External id": 289711,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367035079.725, "dur": 1.109, + "args": { + "External id": 289712,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367035088.649, "dur": 37.213, + "args": { + "External id": 289713,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367035091.485, "dur": 2.672, + "args": { + "External id": 289714,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367035096.081, "dur": 0.560, + "args": { + "External id": 289715,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367035097.839, "dur": 0.817, + "args": { + "External id": 289716,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367035102.600, "dur": 2.094, + "args": { + "External id": 289717,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367035106.279, "dur": 0.375, + "args": { + "External id": 289718,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367035112.313, "dur": 0.509, + "args": { + "External id": 289719,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367035115.582, "dur": 0.610, + "args": { + "External id": 289720,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367035117.298, "dur": 0.648, + "args": { + "External id": 289721,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367035119.144, "dur": 2.496, + "args": { + "External id": 289722,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367035136.727, "dur": 23.857, + "args": { + "External id": 289723,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367035249.567, "dur": 306.765, + "args": { + "External id": 289724,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367035284.043, "dur": 268.053, + "args": { + "External id": 289725,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5257, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367035296.473, "dur": 248.807, + "args": { + "External id": 289726,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367035578.023, "dur": 1.779, + "args": { + "External id": 289727,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5259, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.4)", "pid": 2070552, "tid": 2070552, + "ts": 5333367035711.788, "dur": 17598.819, + "args": { + "External id": 289728,"Record function id": 0, "Ev Idx": 5260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367035813.379, "dur": 6.289, + "args": { + "External id": 289729,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367035823.771, "dur": 1.108, + "args": { + "External id": 289730,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367035826.593, "dur": 3.064, + "args": { + "External id": 289731,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367035831.454, "dur": 1.083, + "args": { + "External id": 289732,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367035834.003, "dur": 0.868, + "args": { + "External id": 289733,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367035836.584, "dur": 0.886, + "args": { + "External id": 289734,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367035841.083, "dur": 0.887, + "args": { + "External id": 289735,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367035843.794, "dur": 1.478, + "args": { + "External id": 289736,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367035846.726, "dur": 1.097, + "args": { + "External id": 289737,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367035849.032, "dur": 0.941, + "args": { + "External id": 289738,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367035869.188, "dur": 17396.457, + "args": { + "External id": 289739,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367035884.481, "dur": 17373.419, + "args": { + "External id": 289740,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367035903.608, "dur": 14.445, + "args": { + "External id": 289741,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367035921.626, "dur": 17302.681, + "args": { + "External id": 289742,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367035924.095, "dur": 17299.434, + "args": { + "External id": 289743,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367035930.064, "dur": 6.358, + "args": { + "External id": 289744,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367035937.999, "dur": 17282.248, + "args": { + "External id": 289745,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367053452.853, "dur": 30.104, + "args": { + "External id": 289746,"Sequence number": 1209169, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5278 + } + }, + { + "ph": "s", "id": 63, "pid": 2070552, "tid": 2070552, "ts": 5333367053452.853, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367053469.559, "dur": 8.824, + "args": { + "External id": 289747,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367053473.223, "dur": 4.873, + "args": { + "External id": 289748,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367053546.202, "dur": 116.550, + "args": { + "External id": 289749,"Record function id": 0, "Ev Idx": 5281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367053665.819, "dur": 1100.626, + "args": { + "External id": 289750,"Record function id": 0, "Ev Idx": 5282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367053710.009, "dur": 1043.025, + "args": { + "External id": 289751,"Sequence number": 1209170, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5283 + } + }, + { + "ph": "s", "id": 62, "pid": 2070552, "tid": 2070552, "ts": 5333367053710.009, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367053778.723, "dur": 45.509, + "args": { + "External id": 289752,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367053837.874, "dur": 107.650, + "args": { + "External id": 289753,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367053955.139, "dur": 51.328, + "args": { + "External id": 289754,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367054013.870, "dur": 31.035, + "args": { + "External id": 289755,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367054071.109, "dur": 25.730, + "args": { + "External id": 289756,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367054114.166, "dur": 14.721, + "args": { + "External id": 289757,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367054149.228, "dur": 147.826, + "args": { + "External id": 289758,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367054215.401, "dur": 12.296, + "args": { + "External id": 289759,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367054220.395, "dur": 6.281, + "args": { + "External id": 289760,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367054230.271, "dur": 5.576, + "args": { + "External id": 289761,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367054237.084, "dur": 1.202, + "args": { + "External id": 289762,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367054240.570, "dur": 4.840, + "args": { + "External id": 289763,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367054308.649, "dur": 51.401, + "args": { + "External id": 289764,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367054391.297, "dur": 27.846, + "args": { + "External id": 289765,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367054428.639, "dur": 41.149, + "args": { + "External id": 289766,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367054478.505, "dur": 34.419, + "args": { + "External id": 289767,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367054533.198, "dur": 24.808, + "args": { + "External id": 289768,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367054563.887, "dur": 34.157, + "args": { + "External id": 289769,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367054618.959, "dur": 57.481, + "args": { + "External id": 289770,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5302 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.4)", "pid": 2070552, "tid": 2070552, + "ts": 5333367054830.561, "dur": 78.908, + "args": { + "External id": 289771,"Record function id": 0, "Ev Idx": 5303 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367054981.154, "dur": 49.137, + "args": { + "External id": 289772,"Record function id": 0, "Ev Idx": 5304 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.5)", "pid": 2070552, "tid": 2070552, + "ts": 5333367055039.167, "dur": 18221.488, + "args": { + "External id": 289773,"Record function id": 0, "Ev Idx": 5305 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.5)", "pid": 2070552, "tid": 2070552, + "ts": 5333367055047.517, "dur": 878.547, + "args": { + "External id": 289774,"Record function id": 0, "Ev Idx": 5306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367055130.728, "dur": 8.149, + "args": { + "External id": 289775,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367055152.294, "dur": 54.008, + "args": { + "External id": 289776,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367055157.457, "dur": 2.346, + "args": { + "External id": 289777,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367055163.822, "dur": 0.675, + "args": { + "External id": 289778,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367055165.717, "dur": 17.184, + "args": { + "External id": 289779,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367055185.217, "dur": 0.659, + "args": { + "External id": 289780,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367055188.556, "dur": 0.475, + "args": { + "External id": 289781,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367055190.610, "dur": 0.389, + "args": { + "External id": 289782,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367055192.054, "dur": 4.661, + "args": { + "External id": 289783,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367055197.889, "dur": 0.588, + "args": { + "External id": 289784,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367055199.446, "dur": 0.490, + "args": { + "External id": 289785,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367055220.610, "dur": 43.258, + "args": { + "External id": 289786,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367055300.729, "dur": 139.565, + "args": { + "External id": 289787,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367055313.514, "dur": 6.207, + "args": { + "External id": 289788,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367055324.705, "dur": 10.643, + "args": { + "External id": 289789,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367055329.182, "dur": 5.640, + "args": { + "External id": 289790,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367055332.787, "dur": 0.661, + "args": { + "External id": 289791,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367055341.847, "dur": 46.630, + "args": { + "External id": 289792,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367055365.415, "dur": 0.431, + "args": { + "External id": 289793,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367055368.906, "dur": 0.271, + "args": { + "External id": 289794,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367055370.165, "dur": 0.264, + "args": { + "External id": 289795,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367055371.762, "dur": 3.763, + "args": { + "External id": 289796,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367055376.721, "dur": 0.408, + "args": { + "External id": 289797,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367055377.842, "dur": 0.344, + "args": { + "External id": 289798,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367055380.985, "dur": 0.268, + "args": { + "External id": 289799,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367055382.437, "dur": 0.240, + "args": { + "External id": 289800,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367055383.623, "dur": 0.302, + "args": { + "External id": 289801,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367055407.989, "dur": 24.588, + "args": { + "External id": 289802,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367055496.097, "dur": 336.455, + "args": { + "External id": 289803,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367055527.408, "dur": 299.814, + "args": { + "External id": 289804,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5336, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367055539.807, "dur": 281.260, + "args": { + "External id": 289805,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367055857.000, "dur": 2.484, + "args": { + "External id": 289806,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5338, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.5)", "pid": 2070552, "tid": 2070552, + "ts": 5333367055946.547, "dur": 17096.217, + "args": { + "External id": 289807,"Record function id": 0, "Ev Idx": 5339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367056047.509, "dur": 5.938, + "args": { + "External id": 289808,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367056057.326, "dur": 1.056, + "args": { + "External id": 289809,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367056060.458, "dur": 3.050, + "args": { + "External id": 289810,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367056065.367, "dur": 0.817, + "args": { + "External id": 289811,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367056067.695, "dur": 1.145, + "args": { + "External id": 289812,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367056070.327, "dur": 0.985, + "args": { + "External id": 289813,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367056074.946, "dur": 0.859, + "args": { + "External id": 289814,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367056077.060, "dur": 1.877, + "args": { + "External id": 289815,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367056080.216, "dur": 0.959, + "args": { + "External id": 289816,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367056082.425, "dur": 0.692, + "args": { + "External id": 289817,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367056102.614, "dur": 16894.617, + "args": { + "External id": 289818,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367056117.179, "dur": 16872.205, + "args": { + "External id": 289819,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367056135.895, "dur": 15.425, + "args": { + "External id": 289820,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367056155.475, "dur": 16800.553, + "args": { + "External id": 289821,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367056158.159, "dur": 16797.080, + "args": { + "External id": 289822,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367056163.486, "dur": 24.047, + "args": { + "External id": 289823,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367056190.091, "dur": 16762.049, + "args": { + "External id": 289824,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367073202.430, "dur": 29.690, + "args": { + "External id": 289825,"Sequence number": 1209171, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5357 + } + }, + { + "ph": "s", "id": 61, "pid": 2070552, "tid": 2070552, "ts": 5333367073202.430, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367073218.713, "dur": 8.627, + "args": { + "External id": 289826,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367073222.044, "dur": 4.878, + "args": { + "External id": 289827,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367073298.944, "dur": 83.352, + "args": { + "External id": 289828,"Record function id": 0, "Ev Idx": 5360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367073384.178, "dur": 1067.351, + "args": { + "External id": 289829,"Record function id": 0, "Ev Idx": 5361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367073425.085, "dur": 1013.532, + "args": { + "External id": 289830,"Sequence number": 1209172, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5362 + } + }, + { + "ph": "s", "id": 60, "pid": 2070552, "tid": 2070552, "ts": 5333367073425.085, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367073491.773, "dur": 46.306, + "args": { + "External id": 289831,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367073551.617, "dur": 131.726, + "args": { + "External id": 289832,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367073697.687, "dur": 42.513, + "args": { + "External id": 289833,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367073748.716, "dur": 31.192, + "args": { + "External id": 289834,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367073807.677, "dur": 25.949, + "args": { + "External id": 289835,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367073851.615, "dur": 13.990, + "args": { + "External id": 289836,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367073886.214, "dur": 129.320, + "args": { + "External id": 289837,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367073937.623, "dur": 10.739, + "args": { + "External id": 289838,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367073942.509, "dur": 5.097, + "args": { + "External id": 289839,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367073950.770, "dur": 5.932, + "args": { + "External id": 289840,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367073957.987, "dur": 1.095, + "args": { + "External id": 289841,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367073961.255, "dur": 5.246, + "args": { + "External id": 289842,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367074026.355, "dur": 43.324, + "args": { + "External id": 289843,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367074098.368, "dur": 27.363, + "args": { + "External id": 289844,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367074135.813, "dur": 56.034, + "args": { + "External id": 289845,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367074203.596, "dur": 37.587, + "args": { + "External id": 289846,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367074264.665, "dur": 25.455, + "args": { + "External id": 289847,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367074295.357, "dur": 32.951, + "args": { + "External id": 289848,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367074347.095, "dur": 17.765, + "args": { + "External id": 289849,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5381 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.5)", "pid": 2070552, "tid": 2070552, + "ts": 5333367074515.612, "dur": 78.736, + "args": { + "External id": 289850,"Record function id": 0, "Ev Idx": 5382 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367074712.143, "dur": 48.604, + "args": { + "External id": 289851,"Record function id": 0, "Ev Idx": 5383 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.6)", "pid": 2070552, "tid": 2070552, + "ts": 5333367074770.646, "dur": 18142.635, + "args": { + "External id": 289852,"Record function id": 0, "Ev Idx": 5384 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.6)", "pid": 2070552, "tid": 2070552, + "ts": 5333367074778.953, "dur": 894.128, + "args": { + "External id": 289853,"Record function id": 0, "Ev Idx": 5385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367074861.334, "dur": 8.910, + "args": { + "External id": 289854,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367074884.191, "dur": 34.659, + "args": { + "External id": 289855,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367074889.954, "dur": 2.343, + "args": { + "External id": 289856,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367074896.401, "dur": 0.551, + "args": { + "External id": 289857,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367074897.974, "dur": 0.686, + "args": { + "External id": 289858,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367074899.699, "dur": 0.436, + "args": { + "External id": 289859,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367074903.229, "dur": 0.498, + "args": { + "External id": 289860,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367074904.577, "dur": 0.476, + "args": { + "External id": 289861,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367074905.986, "dur": 3.032, + "args": { + "External id": 289862,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367074910.201, "dur": 0.438, + "args": { + "External id": 289863,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367074912.031, "dur": 0.610, + "args": { + "External id": 289864,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367074930.040, "dur": 42.694, + "args": { + "External id": 289865,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367075004.504, "dur": 108.854, + "args": { + "External id": 289866,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367075016.442, "dur": 3.856, + "args": { + "External id": 289867,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367075025.378, "dur": 10.324, + "args": { + "External id": 289868,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367075029.898, "dur": 5.389, + "args": { + "External id": 289869,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367075033.349, "dur": 0.644, + "args": { + "External id": 289870,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367075042.197, "dur": 29.979, + "args": { + "External id": 289871,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367075044.125, "dur": 2.312, + "args": { + "External id": 289872,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367075047.500, "dur": 0.372, + "args": { + "External id": 289873,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367075048.748, "dur": 0.602, + "args": { + "External id": 289874,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367075052.851, "dur": 2.276, + "args": { + "External id": 289875,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367075056.316, "dur": 0.617, + "args": { + "External id": 289876,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367075058.245, "dur": 0.544, + "args": { + "External id": 289877,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367075062.161, "dur": 0.533, + "args": { + "External id": 289878,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367075064.146, "dur": 0.318, + "args": { + "External id": 289879,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367075065.417, "dur": 2.216, + "args": { + "External id": 289880,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367075083.181, "dur": 22.727, + "args": { + "External id": 289881,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367075165.639, "dur": 373.625, + "args": { + "External id": 289882,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367075215.948, "dur": 318.192, + "args": { + "External id": 289883,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5415, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367075226.050, "dur": 301.770, + "args": { + "External id": 289884,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367075564.160, "dur": 2.429, + "args": { + "External id": 289885,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5417, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.6)", "pid": 2070552, "tid": 2070552, + "ts": 5333367075697.826, "dur": 17008.775, + "args": { + "External id": 289886,"Record function id": 0, "Ev Idx": 5418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367075803.660, "dur": 6.661, + "args": { + "External id": 289887,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367075814.585, "dur": 1.082, + "args": { + "External id": 289888,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367075817.444, "dur": 2.925, + "args": { + "External id": 289889,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367075822.253, "dur": 0.844, + "args": { + "External id": 289890,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367075824.532, "dur": 0.882, + "args": { + "External id": 289891,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367075826.728, "dur": 1.283, + "args": { + "External id": 289892,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367075832.165, "dur": 0.911, + "args": { + "External id": 289893,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367075834.579, "dur": 1.837, + "args": { + "External id": 289894,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367075837.705, "dur": 0.719, + "args": { + "External id": 289895,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367075839.713, "dur": 0.982, + "args": { + "External id": 289896,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367075860.313, "dur": 16798.466, + "args": { + "External id": 289897,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367075875.274, "dur": 16742.080, + "args": { + "External id": 289898,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367075895.766, "dur": 15.543, + "args": { + "External id": 289899,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367075915.093, "dur": 16669.112, + "args": { + "External id": 289900,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367075917.457, "dur": 16666.010, + "args": { + "External id": 289901,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367075922.873, "dur": 5.579, + "args": { + "External id": 289902,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367075930.161, "dur": 16649.964, + "args": { + "External id": 289903,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367092856.016, "dur": 30.450, + "args": { + "External id": 289904,"Sequence number": 1209173, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5436 + } + }, + { + "ph": "s", "id": 59, "pid": 2070552, "tid": 2070552, "ts": 5333367092856.016, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367092872.927, "dur": 8.773, + "args": { + "External id": 289905,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367092876.490, "dur": 4.918, + "args": { + "External id": 289906,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367092952.562, "dur": 82.284, + "args": { + "External id": 289907,"Record function id": 0, "Ev Idx": 5439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367093036.410, "dur": 1074.879, + "args": { + "External id": 289908,"Record function id": 0, "Ev Idx": 5440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367093074.985, "dur": 1023.127, + "args": { + "External id": 289909,"Sequence number": 1209174, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5441 + } + }, + { + "ph": "s", "id": 58, "pid": 2070552, "tid": 2070552, "ts": 5333367093074.985, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367093140.266, "dur": 59.475, + "args": { + "External id": 289910,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367093216.583, "dur": 104.944, + "args": { + "External id": 289911,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367093332.146, "dur": 39.827, + "args": { + "External id": 289912,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367093379.764, "dur": 29.923, + "args": { + "External id": 289913,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367093435.058, "dur": 25.614, + "args": { + "External id": 289914,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367093477.644, "dur": 14.382, + "args": { + "External id": 289915,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367093512.261, "dur": 163.109, + "args": { + "External id": 289916,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367093560.910, "dur": 10.267, + "args": { + "External id": 289917,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367093565.485, "dur": 4.972, + "args": { + "External id": 289918,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367093573.354, "dur": 5.313, + "args": { + "External id": 289919,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367093579.937, "dur": 1.150, + "args": { + "External id": 289920,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367093583.485, "dur": 4.026, + "args": { + "External id": 289921,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367093688.325, "dur": 55.896, + "args": { + "External id": 289922,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367093776.776, "dur": 28.311, + "args": { + "External id": 289923,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367093813.990, "dur": 40.617, + "args": { + "External id": 289924,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367093869.663, "dur": 35.513, + "args": { + "External id": 289925,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367093929.829, "dur": 25.686, + "args": { + "External id": 289926,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367093963.289, "dur": 33.299, + "args": { + "External id": 289927,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367094014.428, "dur": 17.417, + "args": { + "External id": 289928,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5460 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.6)", "pid": 2070552, "tid": 2070552, + "ts": 5333367094190.129, "dur": 77.359, + "args": { + "External id": 289929,"Record function id": 0, "Ev Idx": 5461 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367094342.406, "dur": 47.331, + "args": { + "External id": 289930,"Record function id": 0, "Ev Idx": 5462 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.7)", "pid": 2070552, "tid": 2070552, + "ts": 5333367094398.520, "dur": 18349.934, + "args": { + "External id": 289931,"Record function id": 0, "Ev Idx": 5463 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.7)", "pid": 2070552, "tid": 2070552, + "ts": 5333367094406.297, "dur": 837.967, + "args": { + "External id": 289932,"Record function id": 0, "Ev Idx": 5464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367094488.626, "dur": 8.903, + "args": { + "External id": 289933,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367094511.512, "dur": 34.005, + "args": { + "External id": 289934,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367094516.548, "dur": 2.347, + "args": { + "External id": 289935,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367094523.065, "dur": 0.443, + "args": { + "External id": 289936,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367094524.428, "dur": 0.605, + "args": { + "External id": 289937,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367094526.116, "dur": 0.412, + "args": { + "External id": 289938,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367094529.717, "dur": 0.689, + "args": { + "External id": 289939,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367094531.358, "dur": 0.380, + "args": { + "External id": 289940,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367094532.591, "dur": 4.146, + "args": { + "External id": 289941,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367094537.826, "dur": 0.586, + "args": { + "External id": 289942,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367094539.274, "dur": 0.265, + "args": { + "External id": 289943,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367094555.734, "dur": 43.826, + "args": { + "External id": 289944,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367094672.433, "dur": 119.557, + "args": { + "External id": 289945,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367094686.218, "dur": 5.369, + "args": { + "External id": 289946,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367094696.906, "dur": 12.930, + "args": { + "External id": 289947,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367094701.506, "dur": 7.862, + "args": { + "External id": 289948,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367094706.923, "dur": 0.782, + "args": { + "External id": 289949,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367094716.536, "dur": 26.253, + "args": { + "External id": 289950,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367094718.336, "dur": 2.420, + "args": { + "External id": 289951,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367094722.063, "dur": 0.603, + "args": { + "External id": 289952,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367094723.382, "dur": 0.524, + "args": { + "External id": 289953,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367094726.753, "dur": 2.473, + "args": { + "External id": 289954,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367094729.894, "dur": 0.284, + "args": { + "External id": 289955,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367094731.131, "dur": 0.616, + "args": { + "External id": 289956,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367094734.095, "dur": 0.366, + "args": { + "External id": 289957,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367094735.608, "dur": 0.298, + "args": { + "External id": 289958,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367094736.613, "dur": 2.554, + "args": { + "External id": 289959,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367094756.015, "dur": 27.605, + "args": { + "External id": 289960,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367094847.474, "dur": 288.287, + "args": { + "External id": 289961,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367094880.175, "dur": 251.187, + "args": { + "External id": 289962,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5494, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367094889.904, "dur": 236.404, + "args": { + "External id": 289963,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367095156.800, "dur": 2.318, + "args": { + "External id": 289964,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5496, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.7)", "pid": 2070552, "tid": 2070552, + "ts": 5333367095265.109, "dur": 17248.695, + "args": { + "External id": 289965,"Record function id": 0, "Ev Idx": 5497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367095365.165, "dur": 5.955, + "args": { + "External id": 289966,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367095374.579, "dur": 1.073, + "args": { + "External id": 289967,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367095377.396, "dur": 2.923, + "args": { + "External id": 289968,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367095382.076, "dur": 0.969, + "args": { + "External id": 289969,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367095385.097, "dur": 1.014, + "args": { + "External id": 289970,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367095387.738, "dur": 0.760, + "args": { + "External id": 289971,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367095392.000, "dur": 0.893, + "args": { + "External id": 289972,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367095394.292, "dur": 2.139, + "args": { + "External id": 289973,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367095397.751, "dur": 0.778, + "args": { + "External id": 289974,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367095399.972, "dur": 0.773, + "args": { + "External id": 289975,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367095420.337, "dur": 17048.162, + "args": { + "External id": 289976,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367095434.680, "dur": 17025.808, + "args": { + "External id": 289977,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367095452.920, "dur": 15.559, + "args": { + "External id": 289978,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367095472.319, "dur": 16953.728, + "args": { + "External id": 289979,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367095474.709, "dur": 16950.665, + "args": { + "External id": 289980,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367095481.113, "dur": 6.865, + "args": { + "External id": 289981,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367095489.909, "dur": 16932.003, + "args": { + "External id": 289982,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367112683.915, "dur": 38.673, + "args": { + "External id": 289983,"Sequence number": 1209175, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5515 + } + }, + { + "ph": "s", "id": 57, "pid": 2070552, "tid": 2070552, "ts": 5333367112683.915, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367112707.914, "dur": 9.887, + "args": { + "External id": 289984,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367112711.670, "dur": 5.683, + "args": { + "External id": 289985,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367112787.146, "dur": 83.717, + "args": { + "External id": 289986,"Record function id": 0, "Ev Idx": 5518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367112872.349, "dur": 1081.599, + "args": { + "External id": 289987,"Record function id": 0, "Ev Idx": 5519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367112913.219, "dur": 1027.002, + "args": { + "External id": 289988,"Sequence number": 1209176, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5520 + } + }, + { + "ph": "s", "id": 56, "pid": 2070552, "tid": 2070552, "ts": 5333367112913.219, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367112981.665, "dur": 45.001, + "args": { + "External id": 289989,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367113040.209, "dur": 107.994, + "args": { + "External id": 289990,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367113158.152, "dur": 57.790, + "args": { + "External id": 289991,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367113227.198, "dur": 33.069, + "args": { + "External id": 289992,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367113286.507, "dur": 27.845, + "args": { + "External id": 289993,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367113333.604, "dur": 14.085, + "args": { + "External id": 289994,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367113367.293, "dur": 125.286, + "args": { + "External id": 289995,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367113415.048, "dur": 11.256, + "args": { + "External id": 289996,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367113420.428, "dur": 5.061, + "args": { + "External id": 289997,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367113429.101, "dur": 5.318, + "args": { + "External id": 289998,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367113436.092, "dur": 1.148, + "args": { + "External id": 289999,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367113439.548, "dur": 5.065, + "args": { + "External id": 290000,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367113503.047, "dur": 45.267, + "args": { + "External id": 290001,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367113574.999, "dur": 27.620, + "args": { + "External id": 290002,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367113611.400, "dur": 81.109, + "args": { + "External id": 290003,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367113704.386, "dur": 36.451, + "args": { + "External id": 290004,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367113764.201, "dur": 27.813, + "args": { + "External id": 290005,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367113797.772, "dur": 34.170, + "args": { + "External id": 290006,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367113851.684, "dur": 17.739, + "args": { + "External id": 290007,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5539 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.7)", "pid": 2070552, "tid": 2070552, + "ts": 5333367114019.507, "dur": 79.371, + "args": { + "External id": 290008,"Record function id": 0, "Ev Idx": 5540 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367114192.321, "dur": 50.823, + "args": { + "External id": 290009,"Record function id": 0, "Ev Idx": 5541 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.8)", "pid": 2070552, "tid": 2070552, + "ts": 5333367114253.331, "dur": 18328.788, + "args": { + "External id": 290010,"Record function id": 0, "Ev Idx": 5542 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.8)", "pid": 2070552, "tid": 2070552, + "ts": 5333367114261.307, "dur": 819.565, + "args": { + "External id": 290011,"Record function id": 0, "Ev Idx": 5543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367114345.766, "dur": 8.537, + "args": { + "External id": 290012,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367114370.735, "dur": 37.469, + "args": { + "External id": 290013,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367114376.810, "dur": 2.128, + "args": { + "External id": 290014,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367114383.400, "dur": 0.238, + "args": { + "External id": 290015,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367114385.353, "dur": 0.305, + "args": { + "External id": 290016,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367114387.033, "dur": 0.526, + "args": { + "External id": 290017,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367114390.818, "dur": 0.429, + "args": { + "External id": 290018,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367114392.681, "dur": 0.257, + "args": { + "External id": 290019,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367114394.268, "dur": 3.619, + "args": { + "External id": 290020,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367114399.258, "dur": 0.414, + "args": { + "External id": 290021,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367114401.054, "dur": 0.457, + "args": { + "External id": 290022,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367114419.238, "dur": 41.061, + "args": { + "External id": 290023,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367114491.419, "dur": 109.513, + "args": { + "External id": 290024,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367114503.756, "dur": 3.478, + "args": { + "External id": 290025,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367114512.439, "dur": 10.339, + "args": { + "External id": 290026,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367114516.734, "dur": 5.623, + "args": { + "External id": 290027,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367114520.590, "dur": 0.594, + "args": { + "External id": 290028,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367114529.868, "dur": 30.088, + "args": { + "External id": 290029,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367114532.183, "dur": 2.567, + "args": { + "External id": 290030,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367114536.402, "dur": 0.382, + "args": { + "External id": 290031,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367114538.129, "dur": 0.383, + "args": { + "External id": 290032,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367114541.884, "dur": 1.231, + "args": { + "External id": 290033,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367114544.783, "dur": 0.401, + "args": { + "External id": 290034,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367114546.603, "dur": 0.176, + "args": { + "External id": 290035,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367114549.965, "dur": 0.501, + "args": { + "External id": 290036,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367114551.822, "dur": 0.159, + "args": { + "External id": 290037,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367114553.230, "dur": 2.177, + "args": { + "External id": 290038,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367114570.009, "dur": 22.627, + "args": { + "External id": 290039,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367114696.137, "dur": 296.685, + "args": { + "External id": 290040,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367114729.788, "dur": 258.373, + "args": { + "External id": 290041,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5573, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367114740.362, "dur": 242.409, + "args": { + "External id": 290042,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367115013.864, "dur": 2.202, + "args": { + "External id": 290043,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5575, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.8)", "pid": 2070552, "tid": 2070552, + "ts": 5333367115099.816, "dur": 17278.926, + "args": { + "External id": 290044,"Record function id": 0, "Ev Idx": 5576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367115216.275, "dur": 6.615, + "args": { + "External id": 290045,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367115226.884, "dur": 1.262, + "args": { + "External id": 290046,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367115230.029, "dur": 2.202, + "args": { + "External id": 290047,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367115233.872, "dur": 0.830, + "args": { + "External id": 290048,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367115235.924, "dur": 0.917, + "args": { + "External id": 290049,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367115237.952, "dur": 0.945, + "args": { + "External id": 290050,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367115242.478, "dur": 0.699, + "args": { + "External id": 290051,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367115244.651, "dur": 1.887, + "args": { + "External id": 290052,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367115248.257, "dur": 0.587, + "args": { + "External id": 290053,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367115250.364, "dur": 0.682, + "args": { + "External id": 290054,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367115273.081, "dur": 17058.826, + "args": { + "External id": 290055,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367115288.287, "dur": 17035.658, + "args": { + "External id": 290056,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367115306.380, "dur": 14.206, + "args": { + "External id": 290057,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367115324.294, "dur": 16965.262, + "args": { + "External id": 290058,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367115326.746, "dur": 16962.052, + "args": { + "External id": 290059,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367115332.933, "dur": 5.391, + "args": { + "External id": 290060,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367115339.857, "dur": 16945.714, + "args": { + "External id": 290061,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367132524.466, "dur": 32.916, + "args": { + "External id": 290062,"Sequence number": 1209177, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5594 + } + }, + { + "ph": "s", "id": 55, "pid": 2070552, "tid": 2070552, "ts": 5333367132524.466, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367132543.633, "dur": 9.059, + "args": { + "External id": 290063,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367132547.659, "dur": 4.819, + "args": { + "External id": 290064,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367132651.268, "dur": 84.344, + "args": { + "External id": 290065,"Record function id": 0, "Ev Idx": 5597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367132737.925, "dur": 1080.028, + "args": { + "External id": 290066,"Record function id": 0, "Ev Idx": 5598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367132779.787, "dur": 1024.896, + "args": { + "External id": 290067,"Sequence number": 1209178, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5599 + } + }, + { + "ph": "s", "id": 54, "pid": 2070552, "tid": 2070552, "ts": 5333367132779.787, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367132848.483, "dur": 46.130, + "args": { + "External id": 290068,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367132907.435, "dur": 102.727, + "args": { + "External id": 290069,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367133021.162, "dur": 38.248, + "args": { + "External id": 290070,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367133067.048, "dur": 31.586, + "args": { + "External id": 290071,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367133122.512, "dur": 23.213, + "args": { + "External id": 290072,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367133164.402, "dur": 33.085, + "args": { + "External id": 290073,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367133219.989, "dur": 132.623, + "args": { + "External id": 290074,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367133273.099, "dur": 12.518, + "args": { + "External id": 290075,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367133278.499, "dur": 6.254, + "args": { + "External id": 290076,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367133288.424, "dur": 5.485, + "args": { + "External id": 290077,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367133295.481, "dur": 0.962, + "args": { + "External id": 290078,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367133298.788, "dur": 5.134, + "args": { + "External id": 290079,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367133363.875, "dur": 49.967, + "args": { + "External id": 290080,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367133446.283, "dur": 28.391, + "args": { + "External id": 290081,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367133483.884, "dur": 40.224, + "args": { + "External id": 290082,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367133533.568, "dur": 35.155, + "args": { + "External id": 290083,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367133588.714, "dur": 25.336, + "args": { + "External id": 290084,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367133653.069, "dur": 40.056, + "args": { + "External id": 290085,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367133719.167, "dur": 18.195, + "args": { + "External id": 290086,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5618 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.8)", "pid": 2070552, "tid": 2070552, + "ts": 5333367133881.036, "dur": 73.895, + "args": { + "External id": 290087,"Record function id": 0, "Ev Idx": 5619 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367134026.991, "dur": 44.288, + "args": { + "External id": 290088,"Record function id": 0, "Ev Idx": 5620 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.9)", "pid": 2070552, "tid": 2070552, + "ts": 5333367134080.555, "dur": 18367.472, + "args": { + "External id": 290089,"Record function id": 0, "Ev Idx": 5621 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.9)", "pid": 2070552, "tid": 2070552, + "ts": 5333367134088.562, "dur": 851.383, + "args": { + "External id": 290090,"Record function id": 0, "Ev Idx": 5622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367134185.128, "dur": 9.480, + "args": { + "External id": 290091,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367134210.943, "dur": 38.594, + "args": { + "External id": 290092,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367134217.045, "dur": 2.117, + "args": { + "External id": 290093,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367134223.775, "dur": 0.664, + "args": { + "External id": 290094,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367134225.824, "dur": 0.376, + "args": { + "External id": 290095,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367134227.687, "dur": 0.525, + "args": { + "External id": 290096,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367134231.013, "dur": 0.445, + "args": { + "External id": 290097,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367134232.711, "dur": 0.553, + "args": { + "External id": 290098,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367134234.906, "dur": 4.177, + "args": { + "External id": 290099,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367134240.794, "dur": 0.779, + "args": { + "External id": 290100,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367134242.771, "dur": 0.422, + "args": { + "External id": 290101,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367134261.577, "dur": 43.306, + "args": { + "External id": 290102,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367134338.120, "dur": 121.261, + "args": { + "External id": 290103,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367134351.727, "dur": 3.906, + "args": { + "External id": 290104,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367134360.652, "dur": 10.499, + "args": { + "External id": 290105,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367134365.305, "dur": 5.417, + "args": { + "External id": 290106,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367134368.990, "dur": 0.569, + "args": { + "External id": 290107,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367134377.793, "dur": 35.563, + "args": { + "External id": 290108,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367134380.666, "dur": 2.228, + "args": { + "External id": 290109,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367134384.632, "dur": 0.456, + "args": { + "External id": 290110,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367134386.869, "dur": 0.516, + "args": { + "External id": 290111,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367134391.119, "dur": 2.125, + "args": { + "External id": 290112,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367134397.679, "dur": 0.378, + "args": { + "External id": 290113,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367134399.364, "dur": 0.477, + "args": { + "External id": 290114,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367134402.873, "dur": 0.396, + "args": { + "External id": 290115,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367134404.772, "dur": 0.590, + "args": { + "External id": 290116,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367134406.649, "dur": 1.848, + "args": { + "External id": 290117,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367134427.412, "dur": 24.147, + "args": { + "External id": 290118,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367134512.096, "dur": 331.651, + "args": { + "External id": 290119,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367134544.435, "dur": 294.353, + "args": { + "External id": 290120,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5652, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367134554.050, "dur": 278.455, + "args": { + "External id": 290121,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367134867.914, "dur": 2.745, + "args": { + "External id": 290122,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5654, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.9)", "pid": 2070552, "tid": 2070552, + "ts": 5333367134961.192, "dur": 17286.194, + "args": { + "External id": 290123,"Record function id": 0, "Ev Idx": 5655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367135060.922, "dur": 5.931, + "args": { + "External id": 290124,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367135070.266, "dur": 1.020, + "args": { + "External id": 290125,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367135073.308, "dur": 3.210, + "args": { + "External id": 290126,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367135078.242, "dur": 1.081, + "args": { + "External id": 290127,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367135080.680, "dur": 0.959, + "args": { + "External id": 290128,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367135082.912, "dur": 0.963, + "args": { + "External id": 290129,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367135087.303, "dur": 0.662, + "args": { + "External id": 290130,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367135089.331, "dur": 2.410, + "args": { + "External id": 290131,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367135093.432, "dur": 0.987, + "args": { + "External id": 290132,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367135096.043, "dur": 0.737, + "args": { + "External id": 290133,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367135117.264, "dur": 17085.986, + "args": { + "External id": 290134,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367135133.016, "dur": 17062.042, + "args": { + "External id": 290135,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367135152.017, "dur": 32.367, + "args": { + "External id": 290136,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367135189.190, "dur": 16959.892, + "args": { + "External id": 290137,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367135191.769, "dur": 16956.626, + "args": { + "External id": 290138,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367135198.938, "dur": 7.060, + "args": { + "External id": 290139,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367135207.785, "dur": 16937.404, + "args": { + "External id": 290140,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367152389.367, "dur": 32.844, + "args": { + "External id": 290141,"Sequence number": 1209179, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5673 + } + }, + { + "ph": "s", "id": 53, "pid": 2070552, "tid": 2070552, "ts": 5333367152389.367, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367152408.769, "dur": 8.533, + "args": { + "External id": 290142,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367152412.222, "dur": 4.876, + "args": { + "External id": 290143,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367152486.868, "dur": 80.222, + "args": { + "External id": 290144,"Record function id": 0, "Ev Idx": 5676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367152568.913, "dur": 1124.581, + "args": { + "External id": 290145,"Record function id": 0, "Ev Idx": 5677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367152611.047, "dur": 1067.317, + "args": { + "External id": 290146,"Sequence number": 1209180, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5678 + } + }, + { + "ph": "s", "id": 52, "pid": 2070552, "tid": 2070552, "ts": 5333367152611.047, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367152722.756, "dur": 45.493, + "args": { + "External id": 290147,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367152782.813, "dur": 107.499, + "args": { + "External id": 290148,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367152900.997, "dur": 37.323, + "args": { + "External id": 290149,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367152946.530, "dur": 30.763, + "args": { + "External id": 290150,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367153003.802, "dur": 25.853, + "args": { + "External id": 290151,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367153046.742, "dur": 14.346, + "args": { + "External id": 290152,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367153087.503, "dur": 145.964, + "args": { + "External id": 290153,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367153136.196, "dur": 11.098, + "args": { + "External id": 290154,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367153141.171, "dur": 5.306, + "args": { + "External id": 290155,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367153150.066, "dur": 6.410, + "args": { + "External id": 290156,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367153157.749, "dur": 1.122, + "args": { + "External id": 290157,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367153161.289, "dur": 4.567, + "args": { + "External id": 290158,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367153245.567, "dur": 51.021, + "args": { + "External id": 290159,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367153330.258, "dur": 29.545, + "args": { + "External id": 290160,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367153368.994, "dur": 41.249, + "args": { + "External id": 290161,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367153418.253, "dur": 34.847, + "args": { + "External id": 290162,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367153473.575, "dur": 23.865, + "args": { + "External id": 290163,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367153503.729, "dur": 33.871, + "args": { + "External id": 290164,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367153558.570, "dur": 18.039, + "args": { + "External id": 290165,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5697 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.9)", "pid": 2070552, "tid": 2070552, + "ts": 5333367153758.986, "dur": 78.162, + "args": { + "External id": 290166,"Record function id": 0, "Ev Idx": 5698 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367153909.430, "dur": 45.683, + "args": { + "External id": 290167,"Record function id": 0, "Ev Idx": 5699 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.10)", "pid": 2070552, "tid": 2070552, + "ts": 5333367153964.251, "dur": 18228.710, + "args": { + "External id": 290168,"Record function id": 0, "Ev Idx": 5700 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.10)", "pid": 2070552, "tid": 2070552, + "ts": 5333367153972.179, "dur": 846.125, + "args": { + "External id": 290169,"Record function id": 0, "Ev Idx": 5701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367154052.751, "dur": 8.613, + "args": { + "External id": 290170,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367154075.481, "dur": 39.918, + "args": { + "External id": 290171,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367154080.940, "dur": 2.577, + "args": { + "External id": 290172,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367154088.030, "dur": 0.393, + "args": { + "External id": 290173,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367154090.175, "dur": 0.840, + "args": { + "External id": 290174,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367154092.611, "dur": 0.473, + "args": { + "External id": 290175,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367154095.985, "dur": 0.678, + "args": { + "External id": 290176,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367154098.341, "dur": 0.846, + "args": { + "External id": 290177,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367154100.903, "dur": 4.574, + "args": { + "External id": 290178,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367154106.961, "dur": 0.470, + "args": { + "External id": 290179,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367154109.081, "dur": 0.348, + "args": { + "External id": 290180,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367154126.628, "dur": 58.649, + "args": { + "External id": 290181,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367154220.809, "dur": 121.272, + "args": { + "External id": 290182,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367154234.756, "dur": 5.530, + "args": { + "External id": 290183,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367154245.036, "dur": 11.355, + "args": { + "External id": 290184,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367154249.640, "dur": 6.287, + "args": { + "External id": 290185,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367154253.605, "dur": 0.813, + "args": { + "External id": 290186,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367154263.493, "dur": 31.903, + "args": { + "External id": 290187,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367154265.582, "dur": 2.917, + "args": { + "External id": 290188,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367154270.075, "dur": 0.467, + "args": { + "External id": 290189,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367154272.154, "dur": 0.390, + "args": { + "External id": 290190,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367154276.145, "dur": 2.183, + "args": { + "External id": 290191,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367154279.828, "dur": 0.264, + "args": { + "External id": 290192,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367154281.960, "dur": 0.431, + "args": { + "External id": 290193,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367154285.330, "dur": 0.358, + "args": { + "External id": 290194,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367154287.294, "dur": 0.524, + "args": { + "External id": 290195,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367154289.238, "dur": 1.714, + "args": { + "External id": 290196,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367154306.000, "dur": 28.070, + "args": { + "External id": 290197,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367154396.405, "dur": 327.980, + "args": { + "External id": 290198,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367154429.456, "dur": 289.691, + "args": { + "External id": 290199,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5731, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367154439.237, "dur": 273.771, + "args": { + "External id": 290200,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367154748.682, "dur": 2.106, + "args": { + "External id": 290201,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5733, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.10)", "pid": 2070552, "tid": 2070552, + "ts": 5333367154839.286, "dur": 17152.964, + "args": { + "External id": 290202,"Record function id": 0, "Ev Idx": 5734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367154935.827, "dur": 6.117, + "args": { + "External id": 290203,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367154945.576, "dur": 1.129, + "args": { + "External id": 290204,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367154948.270, "dur": 3.165, + "args": { + "External id": 290205,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367154952.928, "dur": 1.245, + "args": { + "External id": 290206,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367154955.531, "dur": 0.935, + "args": { + "External id": 290207,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367154957.518, "dur": 1.139, + "args": { + "External id": 290208,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367154962.255, "dur": 0.842, + "args": { + "External id": 290209,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367154964.764, "dur": 2.245, + "args": { + "External id": 290210,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367154968.445, "dur": 0.825, + "args": { + "External id": 290211,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367154970.603, "dur": 0.635, + "args": { + "External id": 290212,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367154990.769, "dur": 16961.649, + "args": { + "External id": 290213,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367155005.929, "dur": 16938.733, + "args": { + "External id": 290214,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367155028.222, "dur": 14.875, + "args": { + "External id": 290215,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367155046.992, "dur": 16865.930, + "args": { + "External id": 290216,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367155049.792, "dur": 16862.427, + "args": { + "External id": 290217,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367155055.786, "dur": 5.409, + "args": { + "External id": 290218,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367155062.915, "dur": 16846.509, + "args": { + "External id": 290219,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367172124.141, "dur": 31.214, + "args": { + "External id": 290220,"Sequence number": 1209181, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5752 + } + }, + { + "ph": "s", "id": 51, "pid": 2070552, "tid": 2070552, "ts": 5333367172124.141, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367172142.632, "dur": 8.246, + "args": { + "External id": 290221,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367172146.158, "dur": 4.508, + "args": { + "External id": 290222,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367172230.582, "dur": 80.616, + "args": { + "External id": 290223,"Record function id": 0, "Ev Idx": 5755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367172312.874, "dur": 1058.846, + "args": { + "External id": 290224,"Record function id": 0, "Ev Idx": 5756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367172353.273, "dur": 1005.375, + "args": { + "External id": 290225,"Sequence number": 1209182, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5757 + } + }, + { + "ph": "s", "id": 50, "pid": 2070552, "tid": 2070552, "ts": 5333367172353.273, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367172421.172, "dur": 41.094, + "args": { + "External id": 290226,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367172475.521, "dur": 104.774, + "args": { + "External id": 290227,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367172590.196, "dur": 71.237, + "args": { + "External id": 290228,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367172673.799, "dur": 34.855, + "args": { + "External id": 290229,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367172739.493, "dur": 26.184, + "args": { + "External id": 290230,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367172782.463, "dur": 13.981, + "args": { + "External id": 290231,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367172815.580, "dur": 126.628, + "args": { + "External id": 290232,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367172864.299, "dur": 11.517, + "args": { + "External id": 290233,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367172869.609, "dur": 5.322, + "args": { + "External id": 290234,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367172878.378, "dur": 5.140, + "args": { + "External id": 290235,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367172885.067, "dur": 0.904, + "args": { + "External id": 290236,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367172888.515, "dur": 5.174, + "args": { + "External id": 290237,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367172952.459, "dur": 46.920, + "args": { + "External id": 290238,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367173029.826, "dur": 26.507, + "args": { + "External id": 290239,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367173064.723, "dur": 40.128, + "args": { + "External id": 290240,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367173110.323, "dur": 33.847, + "args": { + "External id": 290241,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367173183.249, "dur": 27.986, + "args": { + "External id": 290242,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367173219.830, "dur": 38.318, + "args": { + "External id": 290243,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367173277.003, "dur": 16.924, + "args": { + "External id": 290244,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5776 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.10)", "pid": 2070552, "tid": 2070552, + "ts": 5333367173433.393, "dur": 75.204, + "args": { + "External id": 290245,"Record function id": 0, "Ev Idx": 5777 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367173582.104, "dur": 85.170, + "args": { + "External id": 290246,"Record function id": 0, "Ev Idx": 5778 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.11)", "pid": 2070552, "tid": 2070552, + "ts": 5333367173678.379, "dur": 18201.374, + "args": { + "External id": 290247,"Record function id": 0, "Ev Idx": 5779 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.11)", "pid": 2070552, "tid": 2070552, + "ts": 5333367173687.213, "dur": 855.010, + "args": { + "External id": 290248,"Record function id": 0, "Ev Idx": 5780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367173768.790, "dur": 8.798, + "args": { + "External id": 290249,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367173792.483, "dur": 38.170, + "args": { + "External id": 290250,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367173798.338, "dur": 2.286, + "args": { + "External id": 290251,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367173805.418, "dur": 0.487, + "args": { + "External id": 290252,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367173807.539, "dur": 0.187, + "args": { + "External id": 290253,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367173809.843, "dur": 0.216, + "args": { + "External id": 290254,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367173813.088, "dur": 0.368, + "args": { + "External id": 290255,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367173815.077, "dur": 0.406, + "args": { + "External id": 290256,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367173816.682, "dur": 3.519, + "args": { + "External id": 290257,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367173821.980, "dur": 0.188, + "args": { + "External id": 290258,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367173823.918, "dur": 0.141, + "args": { + "External id": 290259,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367173842.121, "dur": 40.643, + "args": { + "External id": 290260,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367173914.741, "dur": 115.061, + "args": { + "External id": 290261,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367173926.876, "dur": 3.291, + "args": { + "External id": 290262,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367173935.132, "dur": 10.703, + "args": { + "External id": 290263,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367173939.808, "dur": 5.634, + "args": { + "External id": 290264,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367173943.631, "dur": 0.576, + "args": { + "External id": 290265,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367173953.280, "dur": 28.263, + "args": { + "External id": 290266,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367173955.524, "dur": 2.177, + "args": { + "External id": 290267,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367173959.057, "dur": 0.357, + "args": { + "External id": 290268,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367173960.604, "dur": 0.342, + "args": { + "External id": 290269,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367173964.248, "dur": 1.641, + "args": { + "External id": 290270,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367173967.187, "dur": 0.181, + "args": { + "External id": 290271,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367173969.141, "dur": 0.228, + "args": { + "External id": 290272,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367173972.327, "dur": 0.432, + "args": { + "External id": 290273,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367173974.022, "dur": 0.151, + "args": { + "External id": 290274,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367173975.525, "dur": 1.888, + "args": { + "External id": 290275,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367173996.757, "dur": 25.131, + "args": { + "External id": 290276,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367174082.330, "dur": 364.468, + "args": { + "External id": 290277,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367174123.255, "dur": 318.029, + "args": { + "External id": 290278,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5810, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367174133.171, "dur": 302.016, + "args": { + "External id": 290279,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367174471.781, "dur": 2.667, + "args": { + "External id": 290280,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5812, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.11)", "pid": 2070552, "tid": 2070552, + "ts": 5333367174563.843, "dur": 17120.785, + "args": { + "External id": 290281,"Record function id": 0, "Ev Idx": 5813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367174700.020, "dur": 6.626, + "args": { + "External id": 290282,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367174710.499, "dur": 1.081, + "args": { + "External id": 290283,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367174713.802, "dur": 2.004, + "args": { + "External id": 290284,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367174717.424, "dur": 0.891, + "args": { + "External id": 290285,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367174719.529, "dur": 0.764, + "args": { + "External id": 290286,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367174723.602, "dur": 0.707, + "args": { + "External id": 290287,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367174726.071, "dur": 1.031, + "args": { + "External id": 290288,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367174728.563, "dur": 2.159, + "args": { + "External id": 290289,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367174732.346, "dur": 0.898, + "args": { + "External id": 290290,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367174736.652, "dur": 0.860, + "args": { + "External id": 290291,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367174757.036, "dur": 16860.635, + "args": { + "External id": 290292,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367174773.030, "dur": 16836.496, + "args": { + "External id": 290293,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367174792.130, "dur": 14.528, + "args": { + "External id": 290294,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367174810.851, "dur": 16764.186, + "args": { + "External id": 290295,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367174813.297, "dur": 16761.011, + "args": { + "External id": 290296,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367174819.520, "dur": 6.650, + "args": { + "External id": 290297,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367174828.075, "dur": 16743.215, + "args": { + "External id": 290298,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367191822.264, "dur": 33.305, + "args": { + "External id": 290299,"Sequence number": 1209183, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5831 + } + }, + { + "ph": "s", "id": 49, "pid": 2070552, "tid": 2070552, "ts": 5333367191822.264, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367191841.724, "dur": 8.946, + "args": { + "External id": 290300,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367191845.566, "dur": 4.840, + "args": { + "External id": 290301,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367191917.888, "dur": 84.471, + "args": { + "External id": 290302,"Record function id": 0, "Ev Idx": 5834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367192004.044, "dur": 1081.272, + "args": { + "External id": 290303,"Record function id": 0, "Ev Idx": 5835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367192042.017, "dur": 1029.750, + "args": { + "External id": 290304,"Sequence number": 1209184, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5836 + } + }, + { + "ph": "s", "id": 48, "pid": 2070552, "tid": 2070552, "ts": 5333367192042.017, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367192132.419, "dur": 62.558, + "args": { + "External id": 290305,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367192214.020, "dur": 105.525, + "args": { + "External id": 290306,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367192328.745, "dur": 40.631, + "args": { + "External id": 290307,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367192375.349, "dur": 30.890, + "args": { + "External id": 290308,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367192434.644, "dur": 24.562, + "args": { + "External id": 290309,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367192473.965, "dur": 15.494, + "args": { + "External id": 290310,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367192506.623, "dur": 168.083, + "args": { + "External id": 290311,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367192555.761, "dur": 11.802, + "args": { + "External id": 290312,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367192561.270, "dur": 5.486, + "args": { + "External id": 290313,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367192570.227, "dur": 5.028, + "args": { + "External id": 290314,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367192576.638, "dur": 2.986, + "args": { + "External id": 290315,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367192582.130, "dur": 4.934, + "args": { + "External id": 290316,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367192687.702, "dur": 50.655, + "args": { + "External id": 290317,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367192769.356, "dur": 25.070, + "args": { + "External id": 290318,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367192802.306, "dur": 40.597, + "args": { + "External id": 290319,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367192850.024, "dur": 34.265, + "args": { + "External id": 290320,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367192909.210, "dur": 23.383, + "args": { + "External id": 290321,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367192938.422, "dur": 33.034, + "args": { + "External id": 290322,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367192988.705, "dur": 18.929, + "args": { + "External id": 290323,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5855 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.11)", "pid": 2070552, "tid": 2070552, + "ts": 5333367193147.544, "dur": 96.266, + "args": { + "External id": 290324,"Record function id": 0, "Ev Idx": 5856 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367193320.373, "dur": 46.999, + "args": { + "External id": 290325,"Record function id": 0, "Ev Idx": 5857 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.12)", "pid": 2070552, "tid": 2070552, + "ts": 5333367193377.503, "dur": 18226.740, + "args": { + "External id": 290326,"Record function id": 0, "Ev Idx": 5858 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.12)", "pid": 2070552, "tid": 2070552, + "ts": 5333367193387.440, "dur": 862.710, + "args": { + "External id": 290327,"Record function id": 0, "Ev Idx": 5859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367193467.442, "dur": 8.210, + "args": { + "External id": 290328,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367193489.365, "dur": 37.226, + "args": { + "External id": 290329,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367193494.921, "dur": 2.199, + "args": { + "External id": 290330,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367193501.739, "dur": 0.282, + "args": { + "External id": 290331,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367193503.610, "dur": 0.325, + "args": { + "External id": 290332,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367193505.639, "dur": 0.415, + "args": { + "External id": 290333,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367193509.326, "dur": 0.334, + "args": { + "External id": 290334,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367193511.224, "dur": 0.394, + "args": { + "External id": 290335,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367193513.053, "dur": 3.780, + "args": { + "External id": 290336,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367193518.977, "dur": 0.236, + "args": { + "External id": 290337,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367193520.346, "dur": 0.210, + "args": { + "External id": 290338,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367193537.789, "dur": 41.518, + "args": { + "External id": 290339,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367193611.541, "dur": 159.991, + "args": { + "External id": 290340,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367193662.714, "dur": 5.437, + "args": { + "External id": 290341,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367193673.968, "dur": 11.402, + "args": { + "External id": 290342,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367193678.657, "dur": 6.288, + "args": { + "External id": 290343,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367193682.603, "dur": 0.694, + "args": { + "External id": 290344,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367193693.105, "dur": 30.578, + "args": { + "External id": 290345,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367193695.642, "dur": 2.583, + "args": { + "External id": 290346,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367193700.021, "dur": 0.282, + "args": { + "External id": 290347,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367193701.578, "dur": 0.261, + "args": { + "External id": 290348,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367193705.299, "dur": 1.395, + "args": { + "External id": 290349,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367193708.154, "dur": 0.131, + "args": { + "External id": 290350,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367193709.823, "dur": 0.157, + "args": { + "External id": 290351,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367193713.563, "dur": 0.156, + "args": { + "External id": 290352,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367193715.934, "dur": 0.153, + "args": { + "External id": 290353,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367193717.393, "dur": 2.039, + "args": { + "External id": 290354,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367193735.606, "dur": 27.466, + "args": { + "External id": 290355,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367193826.360, "dur": 311.151, + "args": { + "External id": 290356,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367193859.957, "dur": 272.447, + "args": { + "External id": 290357,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5889, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367193870.517, "dur": 254.270, + "args": { + "External id": 290358,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367194162.320, "dur": 2.389, + "args": { + "External id": 290359,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5891, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.12)", "pid": 2070552, "tid": 2070552, + "ts": 5333367194272.710, "dur": 17135.139, + "args": { + "External id": 290360,"Record function id": 0, "Ev Idx": 5892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367194370.603, "dur": 6.305, + "args": { + "External id": 290361,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367194380.170, "dur": 0.861, + "args": { + "External id": 290362,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367194382.617, "dur": 1.635, + "args": { + "External id": 290363,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367194386.041, "dur": 0.693, + "args": { + "External id": 290364,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367194388.071, "dur": 0.526, + "args": { + "External id": 290365,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367194389.692, "dur": 0.728, + "args": { + "External id": 290366,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367194394.002, "dur": 0.945, + "args": { + "External id": 290367,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367194396.536, "dur": 1.813, + "args": { + "External id": 290368,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367194399.869, "dur": 0.849, + "args": { + "External id": 290369,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367194401.991, "dur": 0.747, + "args": { + "External id": 290370,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367194422.911, "dur": 16938.932, + "args": { + "External id": 290371,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367194439.034, "dur": 16914.672, + "args": { + "External id": 290372,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367194460.844, "dur": 14.340, + "args": { + "External id": 290373,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367194479.388, "dur": 16840.534, + "args": { + "External id": 290374,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367194481.647, "dur": 16837.592, + "args": { + "External id": 290375,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367194487.375, "dur": 5.215, + "args": { + "External id": 290376,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367194494.344, "dur": 16821.528, + "args": { + "External id": 290377,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367211544.172, "dur": 35.579, + "args": { + "External id": 290378,"Sequence number": 1209185, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5910 + } + }, + { + "ph": "s", "id": 47, "pid": 2070552, "tid": 2070552, "ts": 5333367211544.172, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367211565.732, "dur": 9.100, + "args": { + "External id": 290379,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367211569.548, "dur": 5.037, + "args": { + "External id": 290380,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367211672.870, "dur": 81.229, + "args": { + "External id": 290381,"Record function id": 0, "Ev Idx": 5913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367211756.420, "dur": 1084.539, + "args": { + "External id": 290382,"Record function id": 0, "Ev Idx": 5914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367211798.961, "dur": 1028.757, + "args": { + "External id": 290383,"Sequence number": 1209186, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5915 + } + }, + { + "ph": "s", "id": 46, "pid": 2070552, "tid": 2070552, "ts": 5333367211798.961, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367211872.154, "dur": 43.975, + "args": { + "External id": 290384,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367211932.447, "dur": 105.191, + "args": { + "External id": 290385,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367212047.040, "dur": 38.197, + "args": { + "External id": 290386,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367212091.040, "dur": 29.790, + "args": { + "External id": 290387,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367212147.359, "dur": 41.808, + "args": { + "External id": 290388,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367212209.507, "dur": 17.528, + "args": { + "External id": 290389,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367212245.586, "dur": 130.144, + "args": { + "External id": 290390,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 5922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367212293.844, "dur": 12.524, + "args": { + "External id": 290391,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 5923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367212299.548, "dur": 5.898, + "args": { + "External id": 290392,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367212309.350, "dur": 4.983, + "args": { + "External id": 290393,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367212315.648, "dur": 2.850, + "args": { + "External id": 290394,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367212321.096, "dur": 5.191, + "args": { + "External id": 290395,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367212385.512, "dur": 48.334, + "args": { + "External id": 290396,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367212461.139, "dur": 26.921, + "args": { + "External id": 290397,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367212496.318, "dur": 40.813, + "args": { + "External id": 290398,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367212543.545, "dur": 34.523, + "args": { + "External id": 290399,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 5931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367212602.000, "dur": 71.877, + "args": { + "External id": 290400,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 5932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367212681.659, "dur": 39.591, + "args": { + "External id": 290401,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 5933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367212740.908, "dur": 20.624, + "args": { + "External id": 290402,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 5934 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.12)", "pid": 2070552, "tid": 2070552, + "ts": 5333367212905.605, "dur": 73.061, + "args": { + "External id": 290403,"Record function id": 0, "Ev Idx": 5935 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367213051.740, "dur": 46.428, + "args": { + "External id": 290404,"Record function id": 0, "Ev Idx": 5936 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.13)", "pid": 2070552, "tid": 2070552, + "ts": 5333367213107.735, "dur": 18156.343, + "args": { + "External id": 290405,"Record function id": 0, "Ev Idx": 5937 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.13)", "pid": 2070552, "tid": 2070552, + "ts": 5333367213116.213, "dur": 850.475, + "args": { + "External id": 290406,"Record function id": 0, "Ev Idx": 5938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367213214.158, "dur": 8.902, + "args": { + "External id": 290407,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367213237.285, "dur": 37.287, + "args": { + "External id": 290408,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367213242.922, "dur": 2.029, + "args": { + "External id": 290409,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367213249.778, "dur": 0.477, + "args": { + "External id": 290410,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367213251.699, "dur": 0.231, + "args": { + "External id": 290411,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367213253.202, "dur": 0.406, + "args": { + "External id": 290412,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367213256.707, "dur": 0.471, + "args": { + "External id": 290413,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367213258.523, "dur": 0.340, + "args": { + "External id": 290414,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367213260.251, "dur": 3.504, + "args": { + "External id": 290415,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367213265.223, "dur": 0.337, + "args": { + "External id": 290416,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367213267.098, "dur": 0.446, + "args": { + "External id": 290417,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367213286.171, "dur": 42.159, + "args": { + "External id": 290418,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367213361.735, "dur": 119.171, + "args": { + "External id": 290419,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 5951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367213374.842, "dur": 3.640, + "args": { + "External id": 290420,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367213383.303, "dur": 10.466, + "args": { + "External id": 290421,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367213387.679, "dur": 5.616, + "args": { + "External id": 290422,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 5954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367213391.629, "dur": 0.488, + "args": { + "External id": 290423,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 5955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367213401.103, "dur": 31.593, + "args": { + "External id": 290424,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 5956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367213403.772, "dur": 3.069, + "args": { + "External id": 290425,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367213408.364, "dur": 0.482, + "args": { + "External id": 290426,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367213410.806, "dur": 0.506, + "args": { + "External id": 290427,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367213414.277, "dur": 1.334, + "args": { + "External id": 290428,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367213417.244, "dur": 0.163, + "args": { + "External id": 290429,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367213418.868, "dur": 0.327, + "args": { + "External id": 290430,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367213422.221, "dur": 0.182, + "args": { + "External id": 290431,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367213424.230, "dur": 0.185, + "args": { + "External id": 290432,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367213425.975, "dur": 2.043, + "args": { + "External id": 290433,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 5965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367213446.447, "dur": 26.445, + "args": { + "External id": 290434,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 5966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367213535.740, "dur": 336.163, + "args": { + "External id": 290435,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 5967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367213569.033, "dur": 298.011, + "args": { + "External id": 290436,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 5968, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367213583.986, "dur": 276.824, + "args": { + "External id": 290437,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 5969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367213895.226, "dur": 2.154, + "args": { + "External id": 290438,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 5970, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.13)", "pid": 2070552, "tid": 2070552, + "ts": 5333367213987.290, "dur": 17079.520, + "args": { + "External id": 290439,"Record function id": 0, "Ev Idx": 5971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367214079.327, "dur": 6.162, + "args": { + "External id": 290440,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 5972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367214088.789, "dur": 0.983, + "args": { + "External id": 290441,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367214091.368, "dur": 2.360, + "args": { + "External id": 290442,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367214095.352, "dur": 0.784, + "args": { + "External id": 290443,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367214097.544, "dur": 0.596, + "args": { + "External id": 290444,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367214099.239, "dur": 0.485, + "args": { + "External id": 290445,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 5977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367214103.158, "dur": 0.514, + "args": { + "External id": 290446,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 5978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367214105.431, "dur": 1.948, + "args": { + "External id": 290447,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367214109.135, "dur": 0.716, + "args": { + "External id": 290448,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367214111.397, "dur": 0.522, + "args": { + "External id": 290449,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 5981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367214131.826, "dur": 16896.348, + "args": { + "External id": 290450,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367214146.622, "dur": 16874.809, + "args": { + "External id": 290451,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 5983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367214164.852, "dur": 32.333, + "args": { + "External id": 290452,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367214202.032, "dur": 16788.787, + "args": { + "External id": 290453,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 5985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367214204.605, "dur": 16785.677, + "args": { + "External id": 290454,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 5986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367214210.212, "dur": 6.976, + "args": { + "External id": 290455,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 5987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367214218.635, "dur": 16768.739, + "args": { + "External id": 290456,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 5988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367231207.112, "dur": 31.200, + "args": { + "External id": 290457,"Sequence number": 1209187, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 5989 + } + }, + { + "ph": "s", "id": 45, "pid": 2070552, "tid": 2070552, "ts": 5333367231207.112, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367231225.769, "dur": 7.762, + "args": { + "External id": 290458,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 5990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367231228.838, "dur": 4.351, + "args": { + "External id": 290459,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 5991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367231302.750, "dur": 82.417, + "args": { + "External id": 290460,"Record function id": 0, "Ev Idx": 5992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367231386.461, "dur": 1033.292, + "args": { + "External id": 290461,"Record function id": 0, "Ev Idx": 5993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367231424.521, "dur": 982.369, + "args": { + "External id": 290462,"Sequence number": 1209188, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 5994 + } + }, + { + "ph": "s", "id": 44, "pid": 2070552, "tid": 2070552, "ts": 5333367231424.521, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367231488.659, "dur": 44.269, + "args": { + "External id": 290463,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 5995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367231545.365, "dur": 124.003, + "args": { + "External id": 290464,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367231682.145, "dur": 42.965, + "args": { + "External id": 290465,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367231733.124, "dur": 30.772, + "args": { + "External id": 290466,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 5998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367231791.248, "dur": 26.038, + "args": { + "External id": 290467,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 5999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367231834.195, "dur": 13.776, + "args": { + "External id": 290468,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367231866.651, "dur": 124.628, + "args": { + "External id": 290469,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367231915.146, "dur": 10.934, + "args": { + "External id": 290470,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367231920.405, "dur": 4.662, + "args": { + "External id": 290471,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367231928.712, "dur": 5.647, + "args": { + "External id": 290472,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367231936.065, "dur": 0.987, + "args": { + "External id": 290473,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367231939.570, "dur": 5.008, + "args": { + "External id": 290474,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367232001.658, "dur": 43.614, + "args": { + "External id": 290475,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367232073.438, "dur": 27.940, + "args": { + "External id": 290476,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367232110.505, "dur": 39.714, + "args": { + "External id": 290477,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367232158.004, "dur": 51.833, + "args": { + "External id": 290478,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367232236.628, "dur": 23.942, + "args": { + "External id": 290479,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367232266.136, "dur": 35.103, + "args": { + "External id": 290480,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367232322.137, "dur": 18.055, + "args": { + "External id": 290481,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6013 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.13)", "pid": 2070552, "tid": 2070552, + "ts": 5333367232481.682, "dur": 73.713, + "args": { + "External id": 290482,"Record function id": 0, "Ev Idx": 6014 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367232668.813, "dur": 48.551, + "args": { + "External id": 290483,"Record function id": 0, "Ev Idx": 6015 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.14)", "pid": 2070552, "tid": 2070552, + "ts": 5333367232727.284, "dur": 18301.465, + "args": { + "External id": 290484,"Record function id": 0, "Ev Idx": 6016 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.14)", "pid": 2070552, "tid": 2070552, + "ts": 5333367232735.483, "dur": 807.339, + "args": { + "External id": 290485,"Record function id": 0, "Ev Idx": 6017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367232815.154, "dur": 9.192, + "args": { + "External id": 290486,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367232838.354, "dur": 37.470, + "args": { + "External id": 290487,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367232843.830, "dur": 2.260, + "args": { + "External id": 290488,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367232850.492, "dur": 0.239, + "args": { + "External id": 290489,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367232852.251, "dur": 0.238, + "args": { + "External id": 290490,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367232854.601, "dur": 0.159, + "args": { + "External id": 290491,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367232857.998, "dur": 0.195, + "args": { + "External id": 290492,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367232859.985, "dur": 0.334, + "args": { + "External id": 290493,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367232861.587, "dur": 3.129, + "args": { + "External id": 290494,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367232866.490, "dur": 0.375, + "args": { + "External id": 290495,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367232868.088, "dur": 0.342, + "args": { + "External id": 290496,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367232886.763, "dur": 44.402, + "args": { + "External id": 290497,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367232963.662, "dur": 110.435, + "args": { + "External id": 290498,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367232976.216, "dur": 3.599, + "args": { + "External id": 290499,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367232984.484, "dur": 10.341, + "args": { + "External id": 290500,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367232989.030, "dur": 5.401, + "args": { + "External id": 290501,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367232992.608, "dur": 0.549, + "args": { + "External id": 290502,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367233001.590, "dur": 29.718, + "args": { + "External id": 290503,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367233003.853, "dur": 2.731, + "args": { + "External id": 290504,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367233008.140, "dur": 0.368, + "args": { + "External id": 290505,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367233010.341, "dur": 0.447, + "args": { + "External id": 290506,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367233013.953, "dur": 0.931, + "args": { + "External id": 290507,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367233016.182, "dur": 0.341, + "args": { + "External id": 290508,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367233018.068, "dur": 0.136, + "args": { + "External id": 290509,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367233021.499, "dur": 0.185, + "args": { + "External id": 290510,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367233023.376, "dur": 0.357, + "args": { + "External id": 290511,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367233025.135, "dur": 2.294, + "args": { + "External id": 290512,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367233041.788, "dur": 24.063, + "args": { + "External id": 290513,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367233125.542, "dur": 324.884, + "args": { + "External id": 290514,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367233157.185, "dur": 288.239, + "args": { + "External id": 290515,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6047, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367233184.296, "dur": 255.414, + "args": { + "External id": 290516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367233474.748, "dur": 2.279, + "args": { + "External id": 290517,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6049, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.14)", "pid": 2070552, "tid": 2070552, + "ts": 5333367233563.345, "dur": 17270.296, + "args": { + "External id": 290518,"Record function id": 0, "Ev Idx": 6050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367233691.420, "dur": 6.544, + "args": { + "External id": 290519,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367233701.987, "dur": 0.928, + "args": { + "External id": 290520,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367233704.595, "dur": 1.737, + "args": { + "External id": 290521,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367233708.402, "dur": 0.774, + "args": { + "External id": 290522,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367233710.482, "dur": 0.649, + "args": { + "External id": 290523,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367233712.369, "dur": 0.873, + "args": { + "External id": 290524,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367233717.372, "dur": 1.110, + "args": { + "External id": 290525,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367233720.184, "dur": 1.949, + "args": { + "External id": 290526,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367233723.672, "dur": 0.783, + "args": { + "External id": 290527,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367233725.794, "dur": 0.667, + "args": { + "External id": 290528,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367233747.471, "dur": 17044.303, + "args": { + "External id": 290529,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367233763.035, "dur": 17021.085, + "args": { + "External id": 290530,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367233787.796, "dur": 15.083, + "args": { + "External id": 290531,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367233806.714, "dur": 16945.033, + "args": { + "External id": 290532,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367233809.420, "dur": 16941.627, + "args": { + "External id": 290533,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367233815.718, "dur": 4.852, + "args": { + "External id": 290534,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367233822.362, "dur": 16925.773, + "args": { + "External id": 290535,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367250968.687, "dur": 36.148, + "args": { + "External id": 290536,"Sequence number": 1209189, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6068 + } + }, + { + "ph": "s", "id": 43, "pid": 2070552, "tid": 2070552, "ts": 5333367250968.687, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367250991.478, "dur": 8.573, + "args": { + "External id": 290537,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367250995.130, "dur": 4.661, + "args": { + "External id": 290538,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367251066.522, "dur": 83.898, + "args": { + "External id": 290539,"Record function id": 0, "Ev Idx": 6071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367251151.948, "dur": 1092.561, + "args": { + "External id": 290540,"Record function id": 0, "Ev Idx": 6072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367251208.012, "dur": 1022.212, + "args": { + "External id": 290541,"Sequence number": 1209190, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6073 + } + }, + { + "ph": "s", "id": 42, "pid": 2070552, "tid": 2070552, "ts": 5333367251208.012, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367251276.583, "dur": 44.077, + "args": { + "External id": 290542,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367251332.911, "dur": 108.006, + "args": { + "External id": 290543,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367251454.290, "dur": 39.272, + "args": { + "External id": 290544,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367251499.954, "dur": 30.993, + "args": { + "External id": 290545,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367251557.020, "dur": 23.954, + "args": { + "External id": 290546,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367251597.055, "dur": 15.012, + "args": { + "External id": 290547,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367251671.591, "dur": 131.314, + "args": { + "External id": 290548,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367251721.595, "dur": 12.138, + "args": { + "External id": 290549,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367251726.647, "dur": 6.150, + "args": { + "External id": 290550,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367251736.426, "dur": 4.996, + "args": { + "External id": 290551,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367251742.901, "dur": 0.984, + "args": { + "External id": 290552,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367251748.282, "dur": 3.930, + "args": { + "External id": 290553,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367251814.060, "dur": 50.919, + "args": { + "External id": 290554,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367251897.267, "dur": 31.122, + "args": { + "External id": 290555,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367251937.417, "dur": 40.756, + "args": { + "External id": 290556,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367251984.181, "dur": 34.955, + "args": { + "External id": 290557,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367252042.015, "dur": 25.847, + "args": { + "External id": 290558,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367252073.171, "dur": 33.038, + "args": { + "External id": 290559,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367252123.266, "dur": 16.655, + "args": { + "External id": 290560,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6092 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.14)", "pid": 2070552, "tid": 2070552, + "ts": 5333367252309.009, "dur": 73.220, + "args": { + "External id": 290561,"Record function id": 0, "Ev Idx": 6093 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367252453.803, "dur": 45.150, + "args": { + "External id": 290562,"Record function id": 0, "Ev Idx": 6094 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.15)", "pid": 2070552, "tid": 2070552, + "ts": 5333367252507.661, "dur": 18236.998, + "args": { + "External id": 290563,"Record function id": 0, "Ev Idx": 6095 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.15)", "pid": 2070552, "tid": 2070552, + "ts": 5333367252515.269, "dur": 845.629, + "args": { + "External id": 290564,"Record function id": 0, "Ev Idx": 6096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367252595.483, "dur": 8.155, + "args": { + "External id": 290565,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367252617.236, "dur": 74.853, + "args": { + "External id": 290566,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367252659.697, "dur": 2.511, + "args": { + "External id": 290567,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367252667.461, "dur": 0.420, + "args": { + "External id": 290568,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367252669.623, "dur": 0.409, + "args": { + "External id": 290569,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367252671.604, "dur": 0.510, + "args": { + "External id": 290570,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367252675.535, "dur": 0.213, + "args": { + "External id": 290571,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367252676.989, "dur": 0.660, + "args": { + "External id": 290572,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367252679.097, "dur": 3.018, + "args": { + "External id": 290573,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367252683.667, "dur": 0.325, + "args": { + "External id": 290574,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367252685.608, "dur": 0.370, + "args": { + "External id": 290575,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367252704.472, "dur": 45.323, + "args": { + "External id": 290576,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367252785.716, "dur": 122.890, + "args": { + "External id": 290577,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367252798.061, "dur": 4.533, + "args": { + "External id": 290578,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367252807.560, "dur": 10.700, + "args": { + "External id": 290579,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367252812.024, "dur": 5.832, + "args": { + "External id": 290580,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367252815.937, "dur": 0.554, + "args": { + "External id": 290581,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367252825.396, "dur": 36.462, + "args": { + "External id": 290582,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367252827.776, "dur": 2.216, + "args": { + "External id": 290583,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367252831.694, "dur": 0.341, + "args": { + "External id": 290584,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367252839.858, "dur": 0.517, + "args": { + "External id": 290585,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367252843.456, "dur": 1.382, + "args": { + "External id": 290586,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367252846.315, "dur": 0.176, + "args": { + "External id": 290587,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367252847.791, "dur": 2.245, + "args": { + "External id": 290588,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367252851.455, "dur": 0.170, + "args": { + "External id": 290589,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367252853.173, "dur": 0.295, + "args": { + "External id": 290590,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367252857.140, "dur": 0.290, + "args": { + "External id": 290591,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367252875.491, "dur": 25.371, + "args": { + "External id": 290592,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367252963.599, "dur": 305.111, + "args": { + "External id": 290593,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367252996.819, "dur": 266.816, + "args": { + "External id": 290594,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6126, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367253007.038, "dur": 250.761, + "args": { + "External id": 290595,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367253293.033, "dur": 2.528, + "args": { + "External id": 290596,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6128, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.15)", "pid": 2070552, "tid": 2070552, + "ts": 5333367253380.782, "dur": 17142.717, + "args": { + "External id": 290597,"Record function id": 0, "Ev Idx": 6129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367253473.957, "dur": 5.564, + "args": { + "External id": 290598,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367253482.940, "dur": 0.821, + "args": { + "External id": 290599,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367253485.417, "dur": 1.869, + "args": { + "External id": 290600,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367253488.701, "dur": 0.979, + "args": { + "External id": 290601,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367253490.954, "dur": 0.751, + "args": { + "External id": 290602,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367253492.951, "dur": 0.754, + "args": { + "External id": 290603,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367253501.388, "dur": 0.864, + "args": { + "External id": 290604,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367253503.788, "dur": 2.031, + "args": { + "External id": 290605,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367253507.261, "dur": 0.556, + "args": { + "External id": 290606,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367253509.221, "dur": 0.660, + "args": { + "External id": 290607,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367253531.941, "dur": 16947.954, + "args": { + "External id": 290608,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367253547.333, "dur": 16924.665, + "args": { + "External id": 290609,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367253563.877, "dur": 14.106, + "args": { + "External id": 290610,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367253581.938, "dur": 16856.373, + "args": { + "External id": 290611,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367253584.321, "dur": 16853.230, + "args": { + "External id": 290612,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367253590.047, "dur": 5.504, + "args": { + "External id": 290613,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367253597.070, "dur": 16837.484, + "args": { + "External id": 290614,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367270684.276, "dur": 34.897, + "args": { + "External id": 290615,"Sequence number": 1209191, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6147 + } + }, + { + "ph": "s", "id": 41, "pid": 2070552, "tid": 2070552, "ts": 5333367270684.276, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367270705.320, "dur": 9.113, + "args": { + "External id": 290616,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367270709.121, "dur": 4.934, + "args": { + "External id": 290617,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367270782.257, "dur": 84.183, + "args": { + "External id": 290618,"Record function id": 0, "Ev Idx": 6150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367270868.383, "dur": 1064.044, + "args": { + "External id": 290619,"Record function id": 0, "Ev Idx": 6151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367270909.973, "dur": 1008.965, + "args": { + "External id": 290620,"Sequence number": 1209192, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6152 + } + }, + { + "ph": "s", "id": 40, "pid": 2070552, "tid": 2070552, "ts": 5333367270909.973, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367270978.327, "dur": 44.088, + "args": { + "External id": 290621,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367271034.457, "dur": 107.879, + "args": { + "External id": 290622,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367271151.995, "dur": 54.874, + "args": { + "External id": 290623,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367271218.765, "dur": 33.129, + "args": { + "External id": 290624,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367271277.743, "dur": 24.949, + "args": { + "External id": 290625,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367271319.041, "dur": 13.700, + "args": { + "External id": 290626,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367271352.386, "dur": 124.997, + "args": { + "External id": 290627,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367271400.552, "dur": 11.394, + "args": { + "External id": 290628,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367271405.765, "dur": 5.269, + "args": { + "External id": 290629,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367271414.426, "dur": 5.379, + "args": { + "External id": 290630,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367271421.155, "dur": 1.019, + "args": { + "External id": 290631,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367271424.591, "dur": 3.696, + "args": { + "External id": 290632,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367271486.607, "dur": 44.538, + "args": { + "External id": 290633,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367271558.986, "dur": 25.490, + "args": { + "External id": 290634,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367271592.695, "dur": 77.755, + "args": { + "External id": 290635,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367271683.888, "dur": 38.511, + "args": { + "External id": 290636,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367271747.638, "dur": 24.899, + "args": { + "External id": 290637,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367271778.196, "dur": 32.879, + "args": { + "External id": 290638,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367271830.172, "dur": 17.047, + "args": { + "External id": 290639,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6171 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.15)", "pid": 2070552, "tid": 2070552, + "ts": 5333367271996.118, "dur": 77.178, + "args": { + "External id": 290640,"Record function id": 0, "Ev Idx": 6172 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367272144.808, "dur": 66.877, + "args": { + "External id": 290641,"Record function id": 0, "Ev Idx": 6173 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.16)", "pid": 2070552, "tid": 2070552, + "ts": 5333367272222.784, "dur": 18246.441, + "args": { + "External id": 290642,"Record function id": 0, "Ev Idx": 6174 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.16)", "pid": 2070552, "tid": 2070552, + "ts": 5333367272232.152, "dur": 807.481, + "args": { + "External id": 290643,"Record function id": 0, "Ev Idx": 6175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367272314.392, "dur": 9.026, + "args": { + "External id": 290644,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367272336.890, "dur": 37.155, + "args": { + "External id": 290645,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367272342.504, "dur": 2.128, + "args": { + "External id": 290646,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367272349.336, "dur": 0.253, + "args": { + "External id": 290647,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367272351.294, "dur": 0.495, + "args": { + "External id": 290648,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367272354.033, "dur": 0.274, + "args": { + "External id": 290649,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367272357.618, "dur": 0.201, + "args": { + "External id": 290650,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367272359.319, "dur": 0.492, + "args": { + "External id": 290651,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367272361.335, "dur": 3.031, + "args": { + "External id": 290652,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367272365.924, "dur": 0.318, + "args": { + "External id": 290653,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367272367.653, "dur": 0.191, + "args": { + "External id": 290654,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367272386.097, "dur": 43.230, + "args": { + "External id": 290655,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367272462.498, "dur": 107.172, + "args": { + "External id": 290656,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367272473.871, "dur": 3.873, + "args": { + "External id": 290657,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367272482.873, "dur": 10.316, + "args": { + "External id": 290658,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367272487.141, "dur": 5.610, + "args": { + "External id": 290659,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367272491.003, "dur": 0.640, + "args": { + "External id": 290660,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367272500.833, "dur": 27.794, + "args": { + "External id": 290661,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367272502.954, "dur": 2.084, + "args": { + "External id": 290662,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367272506.445, "dur": 0.366, + "args": { + "External id": 290663,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367272508.104, "dur": 0.333, + "args": { + "External id": 290664,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367272512.122, "dur": 1.450, + "args": { + "External id": 290665,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367272515.014, "dur": 0.339, + "args": { + "External id": 290666,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367272516.841, "dur": 0.157, + "args": { + "External id": 290667,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367272519.041, "dur": 0.488, + "args": { + "External id": 290668,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367272521.141, "dur": 0.157, + "args": { + "External id": 290669,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367272522.530, "dur": 2.147, + "args": { + "External id": 290670,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367272538.298, "dur": 24.127, + "args": { + "External id": 290671,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367272657.973, "dur": 290.526, + "args": { + "External id": 290672,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367272692.065, "dur": 252.295, + "args": { + "External id": 290673,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6205, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367272703.582, "dur": 235.397, + "args": { + "External id": 290674,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367272970.640, "dur": 2.200, + "args": { + "External id": 290675,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6207, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.16)", "pid": 2070552, "tid": 2070552, + "ts": 5333367273060.907, "dur": 17206.029, + "args": { + "External id": 290676,"Record function id": 0, "Ev Idx": 6208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367273155.155, "dur": 5.844, + "args": { + "External id": 290677,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367273164.449, "dur": 1.118, + "args": { + "External id": 290678,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367273185.243, "dur": 2.761, + "args": { + "External id": 290679,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367273190.420, "dur": 0.821, + "args": { + "External id": 290680,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367273192.640, "dur": 0.775, + "args": { + "External id": 290681,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367273194.696, "dur": 0.694, + "args": { + "External id": 290682,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367273199.371, "dur": 0.659, + "args": { + "External id": 290683,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367273201.781, "dur": 2.095, + "args": { + "External id": 290684,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367273205.482, "dur": 0.630, + "args": { + "External id": 290685,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367273207.619, "dur": 0.599, + "args": { + "External id": 290686,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367273240.674, "dur": 16981.071, + "args": { + "External id": 290687,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367273256.599, "dur": 16957.234, + "args": { + "External id": 290688,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367273274.395, "dur": 13.739, + "args": { + "External id": 290689,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367273292.066, "dur": 16886.799, + "args": { + "External id": 290690,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367273294.345, "dur": 16883.500, + "args": { + "External id": 290691,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367273300.457, "dur": 5.696, + "args": { + "External id": 290692,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367273307.872, "dur": 16857.417, + "args": { + "External id": 290693,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367290410.158, "dur": 33.126, + "args": { + "External id": 290694,"Sequence number": 1209193, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6226 + } + }, + { + "ph": "s", "id": 39, "pid": 2070552, "tid": 2070552, "ts": 5333367290410.158, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367290429.790, "dur": 8.794, + "args": { + "External id": 290695,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367290433.605, "dur": 4.717, + "args": { + "External id": 290696,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367290509.045, "dur": 82.442, + "args": { + "External id": 290697,"Record function id": 0, "Ev Idx": 6229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367290593.255, "dur": 1104.674, + "args": { + "External id": 290698,"Record function id": 0, "Ev Idx": 6230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367290680.318, "dur": 1002.331, + "args": { + "External id": 290699,"Sequence number": 1209194, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6231 + } + }, + { + "ph": "s", "id": 38, "pid": 2070552, "tid": 2070552, "ts": 5333367290680.318, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367290750.193, "dur": 43.758, + "args": { + "External id": 290700,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367290806.982, "dur": 107.285, + "args": { + "External id": 290701,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367290923.129, "dur": 37.305, + "args": { + "External id": 290702,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367290969.067, "dur": 30.304, + "args": { + "External id": 290703,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367291024.141, "dur": 24.227, + "args": { + "External id": 290704,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367291065.645, "dur": 14.705, + "args": { + "External id": 290705,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367291100.428, "dur": 146.387, + "args": { + "External id": 290706,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367291149.001, "dur": 11.373, + "args": { + "External id": 290707,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367291154.463, "dur": 5.078, + "args": { + "External id": 290708,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367291163.098, "dur": 21.070, + "args": { + "External id": 290709,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367291186.790, "dur": 1.226, + "args": { + "External id": 290710,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367291190.645, "dur": 4.177, + "args": { + "External id": 290711,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367291257.782, "dur": 49.064, + "args": { + "External id": 290712,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367291337.250, "dur": 25.623, + "args": { + "External id": 290713,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367291371.717, "dur": 40.801, + "args": { + "External id": 290714,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367291420.869, "dur": 34.592, + "args": { + "External id": 290715,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367291477.455, "dur": 23.871, + "args": { + "External id": 290716,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367291506.874, "dur": 32.900, + "args": { + "External id": 290717,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367291558.987, "dur": 17.053, + "args": { + "External id": 290718,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6250 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.16)", "pid": 2070552, "tid": 2070552, + "ts": 5333367291762.704, "dur": 79.717, + "args": { + "External id": 290719,"Record function id": 0, "Ev Idx": 6251 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367291915.354, "dur": 45.608, + "args": { + "External id": 290720,"Record function id": 0, "Ev Idx": 6252 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.17)", "pid": 2070552, "tid": 2070552, + "ts": 5333367291970.299, "dur": 18342.021, + "args": { + "External id": 290721,"Record function id": 0, "Ev Idx": 6253 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.17)", "pid": 2070552, "tid": 2070552, + "ts": 5333367291977.582, "dur": 894.903, + "args": { + "External id": 290722,"Record function id": 0, "Ev Idx": 6254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367292057.227, "dur": 8.807, + "args": { + "External id": 290723,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367292079.814, "dur": 34.787, + "args": { + "External id": 290724,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367292085.257, "dur": 2.139, + "args": { + "External id": 290725,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367292091.692, "dur": 0.260, + "args": { + "External id": 290726,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367292093.423, "dur": 0.307, + "args": { + "External id": 290727,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367292095.025, "dur": 0.258, + "args": { + "External id": 290728,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367292098.152, "dur": 0.378, + "args": { + "External id": 290729,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367292100.053, "dur": 0.486, + "args": { + "External id": 290730,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367292102.459, "dur": 2.887, + "args": { + "External id": 290731,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367292106.869, "dur": 0.308, + "args": { + "External id": 290732,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367292108.596, "dur": 0.173, + "args": { + "External id": 290733,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367292126.093, "dur": 56.982, + "args": { + "External id": 290734,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367292219.058, "dur": 118.288, + "args": { + "External id": 290735,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367292230.917, "dur": 5.133, + "args": { + "External id": 290736,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367292241.336, "dur": 10.704, + "args": { + "External id": 290737,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367292245.764, "dur": 5.818, + "args": { + "External id": 290738,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367292249.536, "dur": 0.573, + "args": { + "External id": 290739,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367292259.171, "dur": 28.595, + "args": { + "External id": 290740,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367292261.567, "dur": 2.219, + "args": { + "External id": 290741,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367292265.304, "dur": 0.227, + "args": { + "External id": 290742,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367292266.995, "dur": 0.242, + "args": { + "External id": 290743,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367292270.857, "dur": 1.160, + "args": { + "External id": 290744,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367292273.147, "dur": 0.197, + "args": { + "External id": 290745,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367292274.735, "dur": 0.186, + "args": { + "External id": 290746,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367292278.557, "dur": 0.141, + "args": { + "External id": 290747,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367292280.237, "dur": 0.216, + "args": { + "External id": 290748,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367292281.825, "dur": 2.283, + "args": { + "External id": 290749,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367292302.239, "dur": 27.414, + "args": { + "External id": 290750,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367292390.646, "dur": 380.961, + "args": { + "External id": 290751,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367292423.444, "dur": 342.858, + "args": { + "External id": 290752,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6284, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367292433.437, "dur": 326.932, + "args": { + "External id": 290753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367292797.664, "dur": 2.407, + "args": { + "External id": 290754,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6286, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.17)", "pid": 2070552, "tid": 2070552, + "ts": 5333367292894.427, "dur": 17197.967, + "args": { + "External id": 290755,"Record function id": 0, "Ev Idx": 6287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367292997.002, "dur": 6.748, + "args": { + "External id": 290756,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367293007.711, "dur": 0.857, + "args": { + "External id": 290757,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367293024.515, "dur": 2.226, + "args": { + "External id": 290758,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367293030.404, "dur": 0.666, + "args": { + "External id": 290759,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367293032.524, "dur": 0.793, + "args": { + "External id": 290760,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367293034.641, "dur": 0.701, + "args": { + "External id": 290761,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367293037.186, "dur": 0.563, + "args": { + "External id": 290762,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367293041.858, "dur": 1.805, + "args": { + "External id": 290763,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367293044.999, "dur": 0.735, + "args": { + "External id": 290764,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367293047.150, "dur": 0.752, + "args": { + "External id": 290765,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367293069.077, "dur": 16978.207, + "args": { + "External id": 290766,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367293087.631, "dur": 16951.689, + "args": { + "External id": 290767,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367293103.562, "dur": 13.774, + "args": { + "External id": 290768,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367293123.139, "dur": 16881.671, + "args": { + "External id": 290769,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367293125.435, "dur": 16878.358, + "args": { + "External id": 290770,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367293131.499, "dur": 5.413, + "args": { + "External id": 290771,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367293138.463, "dur": 16862.452, + "args": { + "External id": 290772,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367310250.731, "dur": 35.092, + "args": { + "External id": 290773,"Sequence number": 1209195, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6305 + } + }, + { + "ph": "s", "id": 37, "pid": 2070552, "tid": 2070552, "ts": 5333367310250.731, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367310271.175, "dur": 9.843, + "args": { + "External id": 290774,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367310275.229, "dur": 5.328, + "args": { + "External id": 290775,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367310351.087, "dur": 80.055, + "args": { + "External id": 290776,"Record function id": 0, "Ev Idx": 6308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367310432.631, "dur": 1062.086, + "args": { + "External id": 290777,"Record function id": 0, "Ev Idx": 6309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367310472.217, "dur": 1008.993, + "args": { + "External id": 290778,"Sequence number": 1209196, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6310 + } + }, + { + "ph": "s", "id": 36, "pid": 2070552, "tid": 2070552, "ts": 5333367310472.217, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367310538.682, "dur": 43.482, + "args": { + "External id": 290779,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367310593.904, "dur": 133.466, + "args": { + "External id": 290780,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367310741.727, "dur": 39.496, + "args": { + "External id": 290781,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367310789.729, "dur": 30.738, + "args": { + "External id": 290782,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367310847.216, "dur": 27.947, + "args": { + "External id": 290783,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367310893.529, "dur": 13.535, + "args": { + "External id": 290784,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367310926.378, "dur": 124.506, + "args": { + "External id": 290785,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367310973.973, "dur": 11.263, + "args": { + "External id": 290786,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367310979.018, "dur": 5.442, + "args": { + "External id": 290787,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367310987.862, "dur": 5.428, + "args": { + "External id": 290788,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367310994.647, "dur": 1.004, + "args": { + "External id": 290789,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367310998.011, "dur": 3.595, + "args": { + "External id": 290790,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367311061.161, "dur": 45.034, + "args": { + "External id": 290791,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367311135.090, "dur": 26.494, + "args": { + "External id": 290792,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367311189.562, "dur": 49.529, + "args": { + "External id": 290793,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367311248.415, "dur": 34.506, + "args": { + "External id": 290794,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367311305.600, "dur": 24.826, + "args": { + "External id": 290795,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367311336.317, "dur": 33.064, + "args": { + "External id": 290796,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367311391.509, "dur": 18.393, + "args": { + "External id": 290797,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6329 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.17)", "pid": 2070552, "tid": 2070552, + "ts": 5333367311557.763, "dur": 115.263, + "args": { + "External id": 290798,"Record function id": 0, "Ev Idx": 6330 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367311750.892, "dur": 45.991, + "args": { + "External id": 290799,"Record function id": 0, "Ev Idx": 6331 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.18)", "pid": 2070552, "tid": 2070552, + "ts": 5333367311806.502, "dur": 18222.325, + "args": { + "External id": 290800,"Record function id": 0, "Ev Idx": 6332 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.18)", "pid": 2070552, "tid": 2070552, + "ts": 5333367311815.325, "dur": 856.573, + "args": { + "External id": 290801,"Record function id": 0, "Ev Idx": 6333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367311896.800, "dur": 9.162, + "args": { + "External id": 290802,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367311920.364, "dur": 36.871, + "args": { + "External id": 290803,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367311925.764, "dur": 2.263, + "args": { + "External id": 290804,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367311932.678, "dur": 0.291, + "args": { + "External id": 290805,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367311934.742, "dur": 0.218, + "args": { + "External id": 290806,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367311936.429, "dur": 0.230, + "args": { + "External id": 290807,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367311940.060, "dur": 0.314, + "args": { + "External id": 290808,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367311941.980, "dur": 0.349, + "args": { + "External id": 290809,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367311943.887, "dur": 3.542, + "args": { + "External id": 290810,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367311948.980, "dur": 0.181, + "args": { + "External id": 290811,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367311950.626, "dur": 0.323, + "args": { + "External id": 290812,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367311968.892, "dur": 45.124, + "args": { + "External id": 290813,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367312046.386, "dur": 153.859, + "args": { + "External id": 290814,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367312057.003, "dur": 3.989, + "args": { + "External id": 290815,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367312065.720, "dur": 35.584, + "args": { + "External id": 290816,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367312094.286, "dur": 6.520, + "args": { + "External id": 290817,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367312099.085, "dur": 0.456, + "args": { + "External id": 290818,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367312108.177, "dur": 28.025, + "args": { + "External id": 290819,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367312110.289, "dur": 2.229, + "args": { + "External id": 290820,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367312114.052, "dur": 0.206, + "args": { + "External id": 290821,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367312115.584, "dur": 0.210, + "args": { + "External id": 290822,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367312119.284, "dur": 1.564, + "args": { + "External id": 290823,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367312122.488, "dur": 0.345, + "args": { + "External id": 290824,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367312124.369, "dur": 0.158, + "args": { + "External id": 290825,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367312127.423, "dur": 0.191, + "args": { + "External id": 290826,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367312129.342, "dur": 0.145, + "args": { + "External id": 290827,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367312130.663, "dur": 2.003, + "args": { + "External id": 290828,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367312147.769, "dur": 43.160, + "args": { + "External id": 290829,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367312256.950, "dur": 291.863, + "args": { + "External id": 290830,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367312290.430, "dur": 253.882, + "args": { + "External id": 290831,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6363, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367312303.555, "dur": 235.746, + "args": { + "External id": 290832,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367312570.410, "dur": 2.347, + "args": { + "External id": 290833,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6365, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.18)", "pid": 2070552, "tid": 2070552, + "ts": 5333367312695.710, "dur": 17135.716, + "args": { + "External id": 290834,"Record function id": 0, "Ev Idx": 6366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367312800.170, "dur": 6.178, + "args": { + "External id": 290835,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367312809.677, "dur": 1.080, + "args": { + "External id": 290836,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367312812.389, "dur": 1.883, + "args": { + "External id": 290837,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367312815.949, "dur": 0.830, + "args": { + "External id": 290838,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367312818.272, "dur": 0.912, + "args": { + "External id": 290839,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367312820.430, "dur": 0.735, + "args": { + "External id": 290840,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367312824.756, "dur": 0.888, + "args": { + "External id": 290841,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367312827.052, "dur": 1.423, + "args": { + "External id": 290842,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367312830.032, "dur": 0.687, + "args": { + "External id": 290843,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367312832.536, "dur": 0.466, + "args": { + "External id": 290844,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367312852.574, "dur": 16936.566, + "args": { + "External id": 290845,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367312868.253, "dur": 16912.852, + "args": { + "External id": 290846,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367312889.234, "dur": 13.457, + "args": { + "External id": 290847,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367312906.480, "dur": 16841.591, + "args": { + "External id": 290848,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367312908.958, "dur": 16838.383, + "args": { + "External id": 290849,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367312914.627, "dur": 4.903, + "args": { + "External id": 290850,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367312921.307, "dur": 16823.081, + "args": { + "External id": 290851,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367329972.261, "dur": 32.669, + "args": { + "External id": 290852,"Sequence number": 1209197, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6384 + } + }, + { + "ph": "s", "id": 35, "pid": 2070552, "tid": 2070552, "ts": 5333367329972.261, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367329991.107, "dur": 8.843, + "args": { + "External id": 290853,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367329994.838, "dur": 4.791, + "args": { + "External id": 290854,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367330065.393, "dur": 83.490, + "args": { + "External id": 290855,"Record function id": 0, "Ev Idx": 6387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367330150.648, "dur": 1096.180, + "args": { + "External id": 290856,"Record function id": 0, "Ev Idx": 6388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367330206.915, "dur": 1024.587, + "args": { + "External id": 290857,"Sequence number": 1209198, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6389 + } + }, + { + "ph": "s", "id": 34, "pid": 2070552, "tid": 2070552, "ts": 5333367330206.915, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367330273.633, "dur": 44.487, + "args": { + "External id": 290858,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367330330.822, "dur": 106.494, + "args": { + "External id": 290859,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367330446.377, "dur": 37.716, + "args": { + "External id": 290860,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367330492.398, "dur": 30.620, + "args": { + "External id": 290861,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367330547.109, "dur": 24.831, + "args": { + "External id": 290862,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367330590.314, "dur": 14.189, + "args": { + "External id": 290863,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367330669.091, "dur": 131.618, + "args": { + "External id": 290864,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367330720.417, "dur": 12.808, + "args": { + "External id": 290865,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367330725.806, "dur": 6.383, + "args": { + "External id": 290866,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367330735.836, "dur": 5.349, + "args": { + "External id": 290867,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367330742.451, "dur": 1.008, + "args": { + "External id": 290868,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367330745.983, "dur": 3.224, + "args": { + "External id": 290869,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367330812.469, "dur": 52.151, + "args": { + "External id": 290870,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367330896.484, "dur": 29.121, + "args": { + "External id": 290871,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367330933.921, "dur": 42.355, + "args": { + "External id": 290872,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367330984.444, "dur": 33.521, + "args": { + "External id": 290873,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367331040.274, "dur": 26.236, + "args": { + "External id": 290874,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367331072.071, "dur": 34.378, + "args": { + "External id": 290875,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367331126.445, "dur": 17.125, + "args": { + "External id": 290876,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6408 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.18)", "pid": 2070552, "tid": 2070552, + "ts": 5333367331312.420, "dur": 77.053, + "args": { + "External id": 290877,"Record function id": 0, "Ev Idx": 6409 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367331461.328, "dur": 44.821, + "args": { + "External id": 290878,"Record function id": 0, "Ev Idx": 6410 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.19)", "pid": 2070552, "tid": 2070552, + "ts": 5333367331515.647, "dur": 18237.471, + "args": { + "External id": 290879,"Record function id": 0, "Ev Idx": 6411 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.19)", "pid": 2070552, "tid": 2070552, + "ts": 5333367331523.093, "dur": 831.271, + "args": { + "External id": 290880,"Record function id": 0, "Ev Idx": 6412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367331605.026, "dur": 8.416, + "args": { + "External id": 290881,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367331662.386, "dur": 36.042, + "args": { + "External id": 290882,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367331667.995, "dur": 2.513, + "args": { + "External id": 290883,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367331675.715, "dur": 0.362, + "args": { + "External id": 290884,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367331677.856, "dur": 0.359, + "args": { + "External id": 290885,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367331679.721, "dur": 0.451, + "args": { + "External id": 290886,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367331682.979, "dur": 0.212, + "args": { + "External id": 290887,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367331684.489, "dur": 0.217, + "args": { + "External id": 290888,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367331685.957, "dur": 3.019, + "args": { + "External id": 290889,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367331690.493, "dur": 0.204, + "args": { + "External id": 290890,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367331692.351, "dur": 0.176, + "args": { + "External id": 290891,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367331709.700, "dur": 44.845, + "args": { + "External id": 290892,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367331789.006, "dur": 113.988, + "args": { + "External id": 290893,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367331800.246, "dur": 4.430, + "args": { + "External id": 290894,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367331809.719, "dur": 10.443, + "args": { + "External id": 290895,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367331814.209, "dur": 5.522, + "args": { + "External id": 290896,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367331818.082, "dur": 0.444, + "args": { + "External id": 290897,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367331826.947, "dur": 28.098, + "args": { + "External id": 290898,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367331829.102, "dur": 1.825, + "args": { + "External id": 290899,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367331832.456, "dur": 0.521, + "args": { + "External id": 290900,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367331834.652, "dur": 0.564, + "args": { + "External id": 290901,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367331838.159, "dur": 1.535, + "args": { + "External id": 290902,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367331841.124, "dur": 0.583, + "args": { + "External id": 290903,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367331843.208, "dur": 0.308, + "args": { + "External id": 290904,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367331846.203, "dur": 0.146, + "args": { + "External id": 290905,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367331847.704, "dur": 0.268, + "args": { + "External id": 290906,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367331849.218, "dur": 2.194, + "args": { + "External id": 290907,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367331870.417, "dur": 24.560, + "args": { + "External id": 290908,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367331958.536, "dur": 305.436, + "args": { + "External id": 290909,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367331990.260, "dur": 268.636, + "args": { + "External id": 290910,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6442, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367332000.244, "dur": 252.515, + "args": { + "External id": 290911,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367332287.869, "dur": 2.258, + "args": { + "External id": 290912,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6444, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.19)", "pid": 2070552, "tid": 2070552, + "ts": 5333367332375.472, "dur": 17146.421, + "args": { + "External id": 290913,"Record function id": 0, "Ev Idx": 6445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367332473.448, "dur": 5.668, + "args": { + "External id": 290914,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367332482.287, "dur": 1.222, + "args": { + "External id": 290915,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367332485.370, "dur": 2.161, + "args": { + "External id": 290916,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367332489.143, "dur": 0.858, + "args": { + "External id": 290917,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367332491.219, "dur": 0.984, + "args": { + "External id": 290918,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367332493.461, "dur": 0.766, + "args": { + "External id": 290919,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367332498.090, "dur": 0.746, + "args": { + "External id": 290920,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367332500.355, "dur": 1.892, + "args": { + "External id": 290921,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367332503.783, "dur": 0.553, + "args": { + "External id": 290922,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367332505.896, "dur": 0.705, + "args": { + "External id": 290923,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367332527.342, "dur": 16950.487, + "args": { + "External id": 290924,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367332543.338, "dur": 16927.148, + "args": { + "External id": 290925,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367332566.942, "dur": 14.684, + "args": { + "External id": 290926,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367332585.692, "dur": 16851.931, + "args": { + "External id": 290927,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367332588.230, "dur": 16848.642, + "args": { + "External id": 290928,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367332595.842, "dur": 6.955, + "args": { + "External id": 290929,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367332604.189, "dur": 16829.402, + "args": { + "External id": 290930,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367349690.544, "dur": 36.054, + "args": { + "External id": 290931,"Sequence number": 1209199, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6463 + } + }, + { + "ph": "s", "id": 33, "pid": 2070552, "tid": 2070552, "ts": 5333367349690.544, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367349712.247, "dur": 9.648, + "args": { + "External id": 290932,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367349716.206, "dur": 5.294, + "args": { + "External id": 290933,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367349790.428, "dur": 80.859, + "args": { + "External id": 290934,"Record function id": 0, "Ev Idx": 6466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367349872.893, "dur": 1065.148, + "args": { + "External id": 290935,"Record function id": 0, "Ev Idx": 6467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367349912.442, "dur": 1012.297, + "args": { + "External id": 290936,"Sequence number": 1209200, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6468 + } + }, + { + "ph": "s", "id": 32, "pid": 2070552, "tid": 2070552, "ts": 5333367349912.442, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367349982.280, "dur": 45.378, + "args": { + "External id": 290937,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367350040.213, "dur": 102.912, + "args": { + "External id": 290938,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367350153.232, "dur": 54.473, + "args": { + "External id": 290939,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367350219.682, "dur": 32.186, + "args": { + "External id": 290940,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367350278.342, "dur": 26.376, + "args": { + "External id": 290941,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367350321.917, "dur": 13.429, + "args": { + "External id": 290942,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367350353.818, "dur": 127.170, + "args": { + "External id": 290943,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367350403.849, "dur": 11.808, + "args": { + "External id": 290944,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367350409.411, "dur": 5.424, + "args": { + "External id": 290945,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367350418.261, "dur": 5.095, + "args": { + "External id": 290946,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367350424.665, "dur": 1.122, + "args": { + "External id": 290947,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367350428.057, "dur": 3.889, + "args": { + "External id": 290948,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367350491.709, "dur": 44.992, + "args": { + "External id": 290949,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367350565.257, "dur": 26.437, + "args": { + "External id": 290950,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367350599.690, "dur": 79.388, + "args": { + "External id": 290951,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367350690.948, "dur": 37.406, + "args": { + "External id": 290952,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367350752.249, "dur": 24.080, + "args": { + "External id": 290953,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367350781.868, "dur": 34.102, + "args": { + "External id": 290954,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367350835.693, "dur": 18.295, + "args": { + "External id": 290955,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6487 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.19)", "pid": 2070552, "tid": 2070552, + "ts": 5333367351000.977, "dur": 74.392, + "args": { + "External id": 290956,"Record function id": 0, "Ev Idx": 6488 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367351147.841, "dur": 65.380, + "args": { + "External id": 290957,"Record function id": 0, "Ev Idx": 6489 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.20)", "pid": 2070552, "tid": 2070552, + "ts": 5333367351224.014, "dur": 18246.119, + "args": { + "External id": 290958,"Record function id": 0, "Ev Idx": 6490 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.20)", "pid": 2070552, "tid": 2070552, + "ts": 5333367351234.014, "dur": 805.272, + "args": { + "External id": 290959,"Record function id": 0, "Ev Idx": 6491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367351314.977, "dur": 9.511, + "args": { + "External id": 290960,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367351339.252, "dur": 34.713, + "args": { + "External id": 290961,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367351344.838, "dur": 2.173, + "args": { + "External id": 290962,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367351351.573, "dur": 0.292, + "args": { + "External id": 290963,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367351353.626, "dur": 0.347, + "args": { + "External id": 290964,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367351355.439, "dur": 0.457, + "args": { + "External id": 290965,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367351358.833, "dur": 0.251, + "args": { + "External id": 290966,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367351360.541, "dur": 0.389, + "args": { + "External id": 290967,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367351362.712, "dur": 2.983, + "args": { + "External id": 290968,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367351367.108, "dur": 0.195, + "args": { + "External id": 290969,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367351368.518, "dur": 0.156, + "args": { + "External id": 290970,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367351385.313, "dur": 43.116, + "args": { + "External id": 290971,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367351460.418, "dur": 113.567, + "args": { + "External id": 290972,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367351470.959, "dur": 4.250, + "args": { + "External id": 290973,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367351479.829, "dur": 10.486, + "args": { + "External id": 290974,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367351484.348, "dur": 5.542, + "args": { + "External id": 290975,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367351488.017, "dur": 0.632, + "args": { + "External id": 290976,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367351497.567, "dur": 31.081, + "args": { + "External id": 290977,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367351503.233, "dur": 2.022, + "args": { + "External id": 290978,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367351506.563, "dur": 0.417, + "args": { + "External id": 290979,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367351508.465, "dur": 0.317, + "args": { + "External id": 290980,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367351512.756, "dur": 1.077, + "args": { + "External id": 290981,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367351514.994, "dur": 0.330, + "args": { + "External id": 290982,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367351516.841, "dur": 0.279, + "args": { + "External id": 290983,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367351519.693, "dur": 0.194, + "args": { + "External id": 290984,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367351520.998, "dur": 0.157, + "args": { + "External id": 290985,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367351522.706, "dur": 2.029, + "args": { + "External id": 290986,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367351541.540, "dur": 24.613, + "args": { + "External id": 290987,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367351663.267, "dur": 285.879, + "args": { + "External id": 290988,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367351694.604, "dur": 250.158, + "args": { + "External id": 290989,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6521, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367351705.496, "dur": 233.232, + "args": { + "External id": 290990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367351970.516, "dur": 2.344, + "args": { + "External id": 290991,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6523, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.20)", "pid": 2070552, "tid": 2070552, + "ts": 5333367352060.175, "dur": 17207.402, + "args": { + "External id": 290992,"Record function id": 0, "Ev Idx": 6524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367352157.806, "dur": 5.675, + "args": { + "External id": 290993,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367352184.586, "dur": 1.743, + "args": { + "External id": 290994,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367352189.161, "dur": 2.323, + "args": { + "External id": 290995,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367352193.194, "dur": 0.769, + "args": { + "External id": 290996,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367352195.563, "dur": 0.853, + "args": { + "External id": 290997,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367352197.648, "dur": 0.861, + "args": { + "External id": 290998,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367352202.058, "dur": 0.880, + "args": { + "External id": 290999,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367352204.203, "dur": 2.041, + "args": { + "External id": 291000,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367352207.997, "dur": 0.798, + "args": { + "External id": 291001,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367352210.466, "dur": 0.539, + "args": { + "External id": 291002,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367352232.568, "dur": 16991.055, + "args": { + "External id": 291003,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367352248.282, "dur": 16967.981, + "args": { + "External id": 291004,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367352270.400, "dur": 14.123, + "args": { + "External id": 291005,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367352288.471, "dur": 16893.793, + "args": { + "External id": 291006,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367352290.826, "dur": 16890.853, + "args": { + "External id": 291007,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367352297.023, "dur": 6.801, + "args": { + "External id": 291008,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367352305.477, "dur": 16872.916, + "args": { + "External id": 291009,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367369411.131, "dur": 33.115, + "args": { + "External id": 291010,"Sequence number": 1209201, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6542 + } + }, + { + "ph": "s", "id": 31, "pid": 2070552, "tid": 2070552, "ts": 5333367369411.131, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367369430.463, "dur": 8.668, + "args": { + "External id": 291011,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367369433.940, "dur": 4.969, + "args": { + "External id": 291012,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367369508.961, "dur": 82.210, + "args": { + "External id": 291013,"Record function id": 0, "Ev Idx": 6545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367369593.079, "dur": 1111.926, + "args": { + "External id": 291014,"Record function id": 0, "Ev Idx": 6546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367369667.586, "dur": 1022.596, + "args": { + "External id": 291015,"Sequence number": 1209202, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6547 + } + }, + { + "ph": "s", "id": 30, "pid": 2070552, "tid": 2070552, "ts": 5333367369667.586, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367369737.066, "dur": 44.257, + "args": { + "External id": 291016,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367369794.857, "dur": 106.750, + "args": { + "External id": 291017,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367369911.306, "dur": 37.765, + "args": { + "External id": 291018,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367369957.797, "dur": 31.236, + "args": { + "External id": 291019,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367370013.837, "dur": 24.055, + "args": { + "External id": 291020,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367370062.455, "dur": 14.552, + "args": { + "External id": 291021,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367370095.725, "dur": 148.341, + "args": { + "External id": 291022,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367370146.400, "dur": 11.055, + "args": { + "External id": 291023,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367370151.436, "dur": 5.236, + "args": { + "External id": 291024,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367370160.299, "dur": 4.941, + "args": { + "External id": 291025,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367370182.976, "dur": 2.315, + "args": { + "External id": 291026,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367370188.326, "dur": 4.012, + "args": { + "External id": 291027,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367370255.505, "dur": 49.000, + "args": { + "External id": 291028,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367370335.465, "dur": 28.607, + "args": { + "External id": 291029,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367370372.428, "dur": 39.916, + "args": { + "External id": 291030,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367370421.198, "dur": 34.552, + "args": { + "External id": 291031,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367370477.134, "dur": 25.219, + "args": { + "External id": 291032,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367370508.419, "dur": 34.346, + "args": { + "External id": 291033,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367370562.184, "dur": 17.603, + "args": { + "External id": 291034,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6566 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.20)", "pid": 2070552, "tid": 2070552, + "ts": 5333367370772.812, "dur": 77.050, + "args": { + "External id": 291035,"Record function id": 0, "Ev Idx": 6567 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367370922.419, "dur": 47.416, + "args": { + "External id": 291036,"Record function id": 0, "Ev Idx": 6568 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.21)", "pid": 2070552, "tid": 2070552, + "ts": 5333367370979.133, "dur": 18282.776, + "args": { + "External id": 291037,"Record function id": 0, "Ev Idx": 6569 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.21)", "pid": 2070552, "tid": 2070552, + "ts": 5333367370987.046, "dur": 844.712, + "args": { + "External id": 291038,"Record function id": 0, "Ev Idx": 6570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367371067.200, "dur": 9.255, + "args": { + "External id": 291039,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367371090.238, "dur": 34.667, + "args": { + "External id": 291040,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367371095.774, "dur": 2.245, + "args": { + "External id": 291041,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367371102.476, "dur": 0.268, + "args": { + "External id": 291042,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367371104.073, "dur": 0.293, + "args": { + "External id": 291043,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367371106.137, "dur": 0.277, + "args": { + "External id": 291044,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367371109.156, "dur": 0.408, + "args": { + "External id": 291045,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367371110.936, "dur": 0.230, + "args": { + "External id": 291046,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367371112.742, "dur": 3.231, + "args": { + "External id": 291047,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367371117.590, "dur": 0.322, + "args": { + "External id": 291048,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367371119.202, "dur": 0.174, + "args": { + "External id": 291049,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367371136.097, "dur": 62.607, + "args": { + "External id": 291050,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367371235.439, "dur": 118.229, + "args": { + "External id": 291051,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367371247.129, "dur": 5.006, + "args": { + "External id": 291052,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367371256.959, "dur": 11.347, + "args": { + "External id": 291053,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367371261.820, "dur": 6.073, + "args": { + "External id": 291054,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367371265.882, "dur": 0.610, + "args": { + "External id": 291055,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367371275.588, "dur": 28.348, + "args": { + "External id": 291056,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367371278.135, "dur": 2.244, + "args": { + "External id": 291057,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367371281.839, "dur": 0.247, + "args": { + "External id": 291058,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367371283.693, "dur": 0.202, + "args": { + "External id": 291059,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367371287.501, "dur": 1.212, + "args": { + "External id": 291060,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367371290.032, "dur": 0.169, + "args": { + "External id": 291061,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367371291.331, "dur": 0.186, + "args": { + "External id": 291062,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367371293.806, "dur": 0.182, + "args": { + "External id": 291063,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367371295.610, "dur": 0.280, + "args": { + "External id": 291064,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367371297.701, "dur": 2.111, + "args": { + "External id": 291065,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367371318.874, "dur": 26.944, + "args": { + "External id": 291066,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367371407.356, "dur": 330.019, + "args": { + "External id": 291067,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367371437.478, "dur": 295.292, + "args": { + "External id": 291068,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6600, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367371448.264, "dur": 278.338, + "args": { + "External id": 291069,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367371763.099, "dur": 2.453, + "args": { + "External id": 291070,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6602, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.21)", "pid": 2070552, "tid": 2070552, + "ts": 5333367371852.022, "dur": 17196.988, + "args": { + "External id": 291071,"Record function id": 0, "Ev Idx": 6603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367371948.697, "dur": 6.167, + "args": { + "External id": 291072,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367371958.316, "dur": 0.802, + "args": { + "External id": 291073,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367371960.863, "dur": 2.600, + "args": { + "External id": 291074,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367371965.165, "dur": 0.664, + "args": { + "External id": 291075,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367371967.485, "dur": 0.666, + "args": { + "External id": 291076,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367371969.729, "dur": 0.650, + "args": { + "External id": 291077,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367371974.573, "dur": 0.560, + "args": { + "External id": 291078,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367371976.682, "dur": 1.928, + "args": { + "External id": 291079,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367371980.240, "dur": 0.764, + "args": { + "External id": 291080,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367371982.517, "dur": 0.705, + "args": { + "External id": 291081,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367372002.694, "dur": 17001.459, + "args": { + "External id": 291082,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367372017.986, "dur": 16978.321, + "args": { + "External id": 291083,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367372041.058, "dur": 15.839, + "args": { + "External id": 291084,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367372060.710, "dur": 16902.700, + "args": { + "External id": 291085,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367372063.482, "dur": 16899.229, + "args": { + "External id": 291086,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367372069.116, "dur": 5.460, + "args": { + "External id": 291087,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367372076.215, "dur": 16883.485, + "args": { + "External id": 291088,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367389200.542, "dur": 34.543, + "args": { + "External id": 291089,"Sequence number": 1209203, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6621 + } + }, + { + "ph": "s", "id": 29, "pid": 2070552, "tid": 2070552, "ts": 5333367389200.542, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367389220.225, "dur": 9.553, + "args": { + "External id": 291090,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367389224.167, "dur": 5.273, + "args": { + "External id": 291091,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367389300.559, "dur": 82.462, + "args": { + "External id": 291092,"Record function id": 0, "Ev Idx": 6624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367389384.673, "dur": 1065.240, + "args": { + "External id": 291093,"Record function id": 0, "Ev Idx": 6625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367389428.703, "dur": 1007.482, + "args": { + "External id": 291094,"Sequence number": 1209204, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6626 + } + }, + { + "ph": "s", "id": 28, "pid": 2070552, "tid": 2070552, "ts": 5333367389428.703, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367389498.816, "dur": 44.169, + "args": { + "External id": 291095,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367389555.316, "dur": 132.194, + "args": { + "External id": 291096,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367389704.453, "dur": 42.501, + "args": { + "External id": 291097,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367389753.853, "dur": 30.478, + "args": { + "External id": 291098,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367389814.833, "dur": 26.026, + "args": { + "External id": 291099,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367389856.646, "dur": 14.585, + "args": { + "External id": 291100,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367389891.217, "dur": 127.169, + "args": { + "External id": 291101,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367389940.246, "dur": 11.813, + "args": { + "External id": 291102,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367389945.485, "dur": 5.779, + "args": { + "External id": 291103,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367389954.897, "dur": 4.332, + "args": { + "External id": 291104,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367389960.439, "dur": 1.002, + "args": { + "External id": 291105,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367389966.255, "dur": 3.699, + "args": { + "External id": 291106,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367390028.716, "dur": 45.450, + "args": { + "External id": 291107,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367390104.269, "dur": 26.222, + "args": { + "External id": 291108,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367390139.378, "dur": 56.967, + "args": { + "External id": 291109,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367390205.611, "dur": 38.216, + "args": { + "External id": 291110,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367390267.225, "dur": 26.078, + "args": { + "External id": 291111,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367390299.125, "dur": 33.640, + "args": { + "External id": 291112,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367390350.003, "dur": 16.861, + "args": { + "External id": 291113,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6645 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.21)", "pid": 2070552, "tid": 2070552, + "ts": 5333367390515.538, "dur": 79.040, + "args": { + "External id": 291114,"Record function id": 0, "Ev Idx": 6646 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367390711.113, "dur": 50.816, + "args": { + "External id": 291115,"Record function id": 0, "Ev Idx": 6647 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.22)", "pid": 2070552, "tid": 2070552, + "ts": 5333367390772.057, "dur": 18233.733, + "args": { + "External id": 291116,"Record function id": 0, "Ev Idx": 6648 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.22)", "pid": 2070552, "tid": 2070552, + "ts": 5333367390780.697, "dur": 823.060, + "args": { + "External id": 291117,"Record function id": 0, "Ev Idx": 6649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367390863.522, "dur": 9.560, + "args": { + "External id": 291118,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367390887.458, "dur": 36.617, + "args": { + "External id": 291119,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367390893.598, "dur": 2.443, + "args": { + "External id": 291120,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367390900.764, "dur": 0.235, + "args": { + "External id": 291121,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367390902.493, "dur": 0.417, + "args": { + "External id": 291122,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367390904.604, "dur": 0.278, + "args": { + "External id": 291123,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367390908.070, "dur": 0.349, + "args": { + "External id": 291124,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367390909.976, "dur": 0.403, + "args": { + "External id": 291125,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367390911.946, "dur": 3.078, + "args": { + "External id": 291126,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367390917.064, "dur": 0.251, + "args": { + "External id": 291127,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367390918.635, "dur": 0.183, + "args": { + "External id": 291128,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367390939.233, "dur": 42.912, + "args": { + "External id": 291129,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367391013.166, "dur": 108.209, + "args": { + "External id": 291130,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367391023.790, "dur": 5.742, + "args": { + "External id": 291131,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367391034.808, "dur": 10.564, + "args": { + "External id": 291132,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367391039.217, "dur": 5.719, + "args": { + "External id": 291133,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367391043.230, "dur": 0.439, + "args": { + "External id": 291134,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367391052.467, "dur": 26.041, + "args": { + "External id": 291135,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367391054.532, "dur": 0.401, + "args": { + "External id": 291136,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367391056.770, "dur": 1.716, + "args": { + "External id": 291137,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367391059.842, "dur": 0.319, + "args": { + "External id": 291138,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367391061.699, "dur": 1.394, + "args": { + "External id": 291139,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367391066.140, "dur": 0.200, + "args": { + "External id": 291140,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367391067.513, "dur": 0.436, + "args": { + "External id": 291141,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367391069.689, "dur": 0.326, + "args": { + "External id": 291142,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367391072.987, "dur": 0.185, + "args": { + "External id": 291143,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367391074.337, "dur": 0.195, + "args": { + "External id": 291144,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367391091.076, "dur": 22.687, + "args": { + "External id": 291145,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367391192.831, "dur": 322.139, + "args": { + "External id": 291146,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367391223.627, "dur": 286.906, + "args": { + "External id": 291147,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6679, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367391234.714, "dur": 270.757, + "args": { + "External id": 291148,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367391537.108, "dur": 2.407, + "args": { + "External id": 291149,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6681, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.22)", "pid": 2070552, "tid": 2070552, + "ts": 5333367391659.391, "dur": 17149.560, + "args": { + "External id": 291150,"Record function id": 0, "Ev Idx": 6682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367391765.767, "dur": 6.826, + "args": { + "External id": 291151,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367391775.870, "dur": 0.938, + "args": { + "External id": 291152,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367391778.665, "dur": 2.138, + "args": { + "External id": 291153,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367391782.339, "dur": 1.087, + "args": { + "External id": 291154,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367391785.045, "dur": 0.760, + "args": { + "External id": 291155,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367391787.273, "dur": 0.871, + "args": { + "External id": 291156,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367391792.213, "dur": 0.794, + "args": { + "External id": 291157,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367391794.605, "dur": 2.093, + "args": { + "External id": 291158,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367391798.148, "dur": 0.733, + "args": { + "External id": 291159,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367391800.297, "dur": 0.592, + "args": { + "External id": 291160,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367391822.114, "dur": 16942.577, + "args": { + "External id": 291161,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367391837.514, "dur": 16919.641, + "args": { + "External id": 291162,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367391859.602, "dur": 15.380, + "args": { + "External id": 291163,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367391879.111, "dur": 16844.308, + "args": { + "External id": 291164,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367391881.421, "dur": 16841.123, + "args": { + "External id": 291165,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367391887.443, "dur": 5.635, + "args": { + "External id": 291166,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367391894.576, "dur": 16825.000, + "args": { + "External id": 291167,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367408946.822, "dur": 34.734, + "args": { + "External id": 291168,"Sequence number": 1209205, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6700 + } + }, + { + "ph": "s", "id": 27, "pid": 2070552, "tid": 2070552, "ts": 5333367408946.822, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367408967.054, "dur": 9.071, + "args": { + "External id": 291169,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367408970.729, "dur": 5.098, + "args": { + "External id": 291170,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367409044.220, "dur": 83.894, + "args": { + "External id": 291171,"Record function id": 0, "Ev Idx": 6703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367409129.956, "dur": 1033.680, + "args": { + "External id": 291172,"Record function id": 0, "Ev Idx": 6704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367409182.383, "dur": 967.388, + "args": { + "External id": 291173,"Sequence number": 1209206, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6705 + } + }, + { + "ph": "s", "id": 26, "pid": 2070552, "tid": 2070552, "ts": 5333367409182.383, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367409250.109, "dur": 43.064, + "args": { + "External id": 291174,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367409306.379, "dur": 99.900, + "args": { + "External id": 291175,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367409415.203, "dur": 36.849, + "args": { + "External id": 291176,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367409459.364, "dur": 30.895, + "args": { + "External id": 291177,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367409513.365, "dur": 24.067, + "args": { + "External id": 291178,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367409555.315, "dur": 15.018, + "args": { + "External id": 291179,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367409588.384, "dur": 164.372, + "args": { + "External id": 291180,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367409672.865, "dur": 13.329, + "args": { + "External id": 291181,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367409678.749, "dur": 6.554, + "args": { + "External id": 291182,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367409688.832, "dur": 3.835, + "args": { + "External id": 291183,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367409694.261, "dur": 1.248, + "args": { + "External id": 291184,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367409698.140, "dur": 3.062, + "args": { + "External id": 291185,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367409764.273, "dur": 47.880, + "args": { + "External id": 291186,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367409841.428, "dur": 27.279, + "args": { + "External id": 291187,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367409876.986, "dur": 40.314, + "args": { + "External id": 291188,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367409925.484, "dur": 33.931, + "args": { + "External id": 291189,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367409981.522, "dur": 25.416, + "args": { + "External id": 291190,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367410012.973, "dur": 33.517, + "args": { + "External id": 291191,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367410064.555, "dur": 16.605, + "args": { + "External id": 291192,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6724 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.22)", "pid": 2070552, "tid": 2070552, + "ts": 5333367410246.518, "dur": 79.759, + "args": { + "External id": 291193,"Record function id": 0, "Ev Idx": 6725 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367410400.141, "dur": 48.043, + "args": { + "External id": 291194,"Record function id": 0, "Ev Idx": 6726 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.23)", "pid": 2070552, "tid": 2070552, + "ts": 5333367410458.145, "dur": 18312.226, + "args": { + "External id": 291195,"Record function id": 0, "Ev Idx": 6727 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.23)", "pid": 2070552, "tid": 2070552, + "ts": 5333367410465.670, "dur": 834.320, + "args": { + "External id": 291196,"Record function id": 0, "Ev Idx": 6728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367410547.295, "dur": 10.034, + "args": { + "External id": 291197,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367410571.401, "dur": 32.299, + "args": { + "External id": 291198,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367410576.620, "dur": 2.127, + "args": { + "External id": 291199,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367410583.123, "dur": 0.220, + "args": { + "External id": 291200,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367410584.536, "dur": 0.444, + "args": { + "External id": 291201,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367410585.797, "dur": 0.294, + "args": { + "External id": 291202,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367410588.840, "dur": 0.239, + "args": { + "External id": 291203,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367410590.844, "dur": 0.146, + "args": { + "External id": 291204,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367410592.388, "dur": 1.553, + "args": { + "External id": 291205,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367410594.742, "dur": 1.995, + "args": { + "External id": 291206,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367410597.961, "dur": 0.281, + "args": { + "External id": 291207,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367410614.177, "dur": 84.215, + "args": { + "External id": 291208,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367410735.289, "dur": 113.817, + "args": { + "External id": 291209,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367410747.034, "dur": 5.127, + "args": { + "External id": 291210,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367410757.373, "dur": 11.107, + "args": { + "External id": 291211,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367410761.938, "dur": 6.083, + "args": { + "External id": 291212,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367410765.702, "dur": 0.754, + "args": { + "External id": 291213,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367410775.252, "dur": 27.214, + "args": { + "External id": 291214,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367410777.115, "dur": 1.804, + "args": { + "External id": 291215,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367410780.275, "dur": 0.632, + "args": { + "External id": 291216,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367410782.324, "dur": 0.158, + "args": { + "External id": 291217,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367410785.245, "dur": 0.360, + "args": { + "External id": 291218,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367410786.558, "dur": 1.812, + "args": { + "External id": 291219,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367410789.386, "dur": 0.503, + "args": { + "External id": 291220,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367410793.251, "dur": 0.539, + "args": { + "External id": 291221,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367410794.670, "dur": 0.299, + "args": { + "External id": 291222,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367410796.570, "dur": 2.017, + "args": { + "External id": 291223,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367410817.904, "dur": 23.463, + "args": { + "External id": 291224,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367410904.479, "dur": 303.600, + "args": { + "External id": 291225,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367410934.417, "dur": 268.680, + "args": { + "External id": 291226,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6758, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367410944.014, "dur": 252.454, + "args": { + "External id": 291227,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367411231.472, "dur": 2.470, + "args": { + "External id": 291228,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6760, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.23)", "pid": 2070552, "tid": 2070552, + "ts": 5333367411319.829, "dur": 17221.372, + "args": { + "External id": 291229,"Record function id": 0, "Ev Idx": 6761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367411416.491, "dur": 6.007, + "args": { + "External id": 291230,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367411425.937, "dur": 0.777, + "args": { + "External id": 291231,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367411428.575, "dur": 1.231, + "args": { + "External id": 291232,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367411431.279, "dur": 2.118, + "args": { + "External id": 291233,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367411434.531, "dur": 0.834, + "args": { + "External id": 291234,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367411436.495, "dur": 0.776, + "args": { + "External id": 291235,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367411452.605, "dur": 0.797, + "args": { + "External id": 291236,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367411455.113, "dur": 1.811, + "args": { + "External id": 291237,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367411458.357, "dur": 0.678, + "args": { + "External id": 291238,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367411460.263, "dur": 0.642, + "args": { + "External id": 291239,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367411481.925, "dur": 17017.587, + "args": { + "External id": 291240,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367411497.390, "dur": 16994.853, + "args": { + "External id": 291241,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367411512.913, "dur": 13.719, + "args": { + "External id": 291242,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367411530.556, "dur": 16928.892, + "args": { + "External id": 291243,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367411532.956, "dur": 16925.827, + "args": { + "External id": 291244,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367411538.512, "dur": 7.565, + "args": { + "External id": 291245,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367411547.638, "dur": 16907.908, + "args": { + "External id": 291246,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367428709.364, "dur": 33.698, + "args": { + "External id": 291247,"Sequence number": 1209207, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6779 + } + }, + { + "ph": "s", "id": 25, "pid": 2070552, "tid": 2070552, "ts": 5333367428709.364, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367428729.124, "dur": 9.100, + "args": { + "External id": 291248,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367428732.684, "dur": 5.144, + "args": { + "External id": 291249,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367428806.309, "dur": 81.049, + "args": { + "External id": 291250,"Record function id": 0, "Ev Idx": 6782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367428888.989, "dur": 1063.406, + "args": { + "External id": 291251,"Record function id": 0, "Ev Idx": 6783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367428931.174, "dur": 1007.931, + "args": { + "External id": 291252,"Sequence number": 1209208, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6784 + } + }, + { + "ph": "s", "id": 24, "pid": 2070552, "tid": 2070552, "ts": 5333367428931.174, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367428999.371, "dur": 45.695, + "args": { + "External id": 291253,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367429057.109, "dur": 103.693, + "args": { + "External id": 291254,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367429188.329, "dur": 42.467, + "args": { + "External id": 291255,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367429240.423, "dur": 30.721, + "args": { + "External id": 291256,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367429302.149, "dur": 27.692, + "args": { + "External id": 291257,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367429344.336, "dur": 13.194, + "args": { + "External id": 291258,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367429375.364, "dur": 122.818, + "args": { + "External id": 291259,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367429423.049, "dur": 11.735, + "args": { + "External id": 291260,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367429428.452, "dur": 5.420, + "args": { + "External id": 291261,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367429437.529, "dur": 3.645, + "args": { + "External id": 291262,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367429442.421, "dur": 0.992, + "args": { + "External id": 291263,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367429445.940, "dur": 4.513, + "args": { + "External id": 291264,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367429508.214, "dur": 43.134, + "args": { + "External id": 291265,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367429579.945, "dur": 26.731, + "args": { + "External id": 291266,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367429614.216, "dur": 81.613, + "args": { + "External id": 291267,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367429709.177, "dur": 36.277, + "args": { + "External id": 291268,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367429771.653, "dur": 25.715, + "args": { + "External id": 291269,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367429802.756, "dur": 34.620, + "args": { + "External id": 291270,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367429853.840, "dur": 19.359, + "args": { + "External id": 291271,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6803 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.23)", "pid": 2070552, "tid": 2070552, + "ts": 5333367430014.546, "dur": 78.024, + "args": { + "External id": 291272,"Record function id": 0, "Ev Idx": 6804 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367430163.611, "dur": 65.377, + "args": { + "External id": 291273,"Record function id": 0, "Ev Idx": 6805 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.24)", "pid": 2070552, "tid": 2070552, + "ts": 5333367430239.117, "dur": 18331.231, + "args": { + "External id": 291274,"Record function id": 0, "Ev Idx": 6806 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.24)", "pid": 2070552, "tid": 2070552, + "ts": 5333367430246.768, "dur": 792.124, + "args": { + "External id": 291275,"Record function id": 0, "Ev Idx": 6807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367430329.392, "dur": 8.674, + "args": { + "External id": 291276,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367430352.287, "dur": 33.256, + "args": { + "External id": 291277,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367430357.927, "dur": 2.157, + "args": { + "External id": 291278,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367430364.327, "dur": 0.251, + "args": { + "External id": 291279,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367430365.825, "dur": 0.314, + "args": { + "External id": 291280,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367430367.576, "dur": 0.165, + "args": { + "External id": 291281,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367430370.915, "dur": 0.181, + "args": { + "External id": 291282,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367430372.177, "dur": 0.255, + "args": { + "External id": 291283,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367430373.354, "dur": 2.790, + "args": { + "External id": 291284,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367430377.539, "dur": 0.228, + "args": { + "External id": 291285,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367430379.101, "dur": 0.344, + "args": { + "External id": 291286,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367430396.610, "dur": 39.878, + "args": { + "External id": 291287,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367430468.549, "dur": 106.877, + "args": { + "External id": 291288,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367430479.369, "dur": 3.560, + "args": { + "External id": 291289,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367430487.970, "dur": 10.124, + "args": { + "External id": 291290,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367430492.384, "dur": 5.258, + "args": { + "External id": 291291,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367430495.846, "dur": 0.582, + "args": { + "External id": 291292,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367430506.653, "dur": 25.449, + "args": { + "External id": 291293,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367430508.270, "dur": 1.814, + "args": { + "External id": 291294,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367430511.822, "dur": 0.253, + "args": { + "External id": 291295,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367430513.317, "dur": 0.338, + "args": { + "External id": 291296,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367430516.368, "dur": 1.317, + "args": { + "External id": 291297,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367430518.769, "dur": 0.158, + "args": { + "External id": 291298,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367430520.509, "dur": 0.144, + "args": { + "External id": 291299,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367430523.576, "dur": 0.161, + "args": { + "External id": 291300,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367430525.309, "dur": 0.186, + "args": { + "External id": 291301,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367430526.500, "dur": 1.572, + "args": { + "External id": 291302,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367430544.424, "dur": 23.135, + "args": { + "External id": 291303,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367430665.217, "dur": 286.357, + "args": { + "External id": 291304,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367430695.600, "dur": 251.565, + "args": { + "External id": 291305,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6837, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367430706.348, "dur": 235.134, + "args": { + "External id": 291306,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367430972.388, "dur": 2.128, + "args": { + "External id": 291307,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6839, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.24)", "pid": 2070552, "tid": 2070552, + "ts": 5333367431059.014, "dur": 17310.137, + "args": { + "External id": 291308,"Record function id": 0, "Ev Idx": 6840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367431153.901, "dur": 6.038, + "args": { + "External id": 291309,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367431163.027, "dur": 1.026, + "args": { + "External id": 291310,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367431165.713, "dur": 19.159, + "args": { + "External id": 291311,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367431189.279, "dur": 0.873, + "args": { + "External id": 291312,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367431191.728, "dur": 0.832, + "args": { + "External id": 291313,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367431193.738, "dur": 0.760, + "args": { + "External id": 291314,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367431198.105, "dur": 0.808, + "args": { + "External id": 291315,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367431200.549, "dur": 1.921, + "args": { + "External id": 291316,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367431203.718, "dur": 0.480, + "args": { + "External id": 291317,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367431205.395, "dur": 0.779, + "args": { + "External id": 291318,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367431227.396, "dur": 17096.133, + "args": { + "External id": 291319,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367431243.108, "dur": 17072.717, + "args": { + "External id": 291320,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367431264.872, "dur": 12.807, + "args": { + "External id": 291321,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367431281.532, "dur": 17000.728, + "args": { + "External id": 291322,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367431284.393, "dur": 16997.156, + "args": { + "External id": 291323,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367431290.482, "dur": 6.164, + "args": { + "External id": 291324,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367431298.646, "dur": 16979.647, + "args": { + "External id": 291325,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367448511.875, "dur": 32.332, + "args": { + "External id": 291326,"Sequence number": 1209209, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6858 + } + }, + { + "ph": "s", "id": 23, "pid": 2070552, "tid": 2070552, "ts": 5333367448511.875, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367448531.101, "dur": 8.323, + "args": { + "External id": 291327,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367448534.822, "dur": 4.374, + "args": { + "External id": 291328,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367448657.553, "dur": 83.079, + "args": { + "External id": 291329,"Record function id": 0, "Ev Idx": 6861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367448744.088, "dur": 1075.015, + "args": { + "External id": 291330,"Record function id": 0, "Ev Idx": 6862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367448787.129, "dur": 1018.411, + "args": { + "External id": 291331,"Sequence number": 1209210, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6863 + } + }, + { + "ph": "s", "id": 22, "pid": 2070552, "tid": 2070552, "ts": 5333367448787.129, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367448858.114, "dur": 45.178, + "args": { + "External id": 291332,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367448916.036, "dur": 106.203, + "args": { + "External id": 291333,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367449033.304, "dur": 37.005, + "args": { + "External id": 291334,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367449076.885, "dur": 30.764, + "args": { + "External id": 291335,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367449133.813, "dur": 25.220, + "args": { + "External id": 291336,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367449191.228, "dur": 17.873, + "args": { + "External id": 291337,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367449227.926, "dur": 129.521, + "args": { + "External id": 291338,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367449278.665, "dur": 12.634, + "args": { + "External id": 291339,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367449284.128, "dur": 6.326, + "args": { + "External id": 291340,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367449293.743, "dur": 4.617, + "args": { + "External id": 291341,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367449299.686, "dur": 1.296, + "args": { + "External id": 291342,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367449305.076, "dur": 3.104, + "args": { + "External id": 291343,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367449368.934, "dur": 47.827, + "args": { + "External id": 291344,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367449447.275, "dur": 27.327, + "args": { + "External id": 291345,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367449483.357, "dur": 41.194, + "args": { + "External id": 291346,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367449531.867, "dur": 34.319, + "args": { + "External id": 291347,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367449588.085, "dur": 26.214, + "args": { + "External id": 291348,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367449656.906, "dur": 39.743, + "args": { + "External id": 291349,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367449718.130, "dur": 18.984, + "args": { + "External id": 291350,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6882 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.24)", "pid": 2070552, "tid": 2070552, + "ts": 5333367449882.744, "dur": 72.615, + "args": { + "External id": 291351,"Record function id": 0, "Ev Idx": 6883 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367450026.143, "dur": 44.580, + "args": { + "External id": 291352,"Record function id": 0, "Ev Idx": 6884 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.25)", "pid": 2070552, "tid": 2070552, + "ts": 5333367450080.155, "dur": 18413.827, + "args": { + "External id": 291353,"Record function id": 0, "Ev Idx": 6885 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.25)", "pid": 2070552, "tid": 2070552, + "ts": 5333367450088.475, "dur": 811.922, + "args": { + "External id": 291354,"Record function id": 0, "Ev Idx": 6886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367450184.764, "dur": 9.799, + "args": { + "External id": 291355,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367450209.850, "dur": 31.678, + "args": { + "External id": 291356,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367450215.131, "dur": 2.201, + "args": { + "External id": 291357,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367450221.171, "dur": 0.462, + "args": { + "External id": 291358,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367450222.799, "dur": 0.575, + "args": { + "External id": 291359,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367450224.666, "dur": 0.401, + "args": { + "External id": 291360,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367450227.836, "dur": 0.715, + "args": { + "External id": 291361,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367450229.288, "dur": 0.344, + "args": { + "External id": 291362,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367450230.903, "dur": 2.013, + "args": { + "External id": 291363,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367450233.765, "dur": 0.428, + "args": { + "External id": 291364,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367450235.413, "dur": 0.392, + "args": { + "External id": 291365,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367450252.112, "dur": 40.027, + "args": { + "External id": 291366,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367450324.974, "dur": 108.188, + "args": { + "External id": 291367,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367450335.638, "dur": 3.796, + "args": { + "External id": 291368,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367450344.276, "dur": 10.564, + "args": { + "External id": 291369,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367450349.027, "dur": 5.317, + "args": { + "External id": 291370,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367450352.430, "dur": 0.737, + "args": { + "External id": 291371,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367450361.258, "dur": 25.121, + "args": { + "External id": 291372,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367450363.329, "dur": 2.638, + "args": { + "External id": 291373,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367450367.390, "dur": 0.611, + "args": { + "External id": 291374,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367450368.748, "dur": 0.338, + "args": { + "External id": 291375,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367450371.888, "dur": 0.433, + "args": { + "External id": 291376,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367450373.265, "dur": 0.504, + "args": { + "External id": 291377,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367450374.407, "dur": 0.576, + "args": { + "External id": 291378,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367450377.511, "dur": 0.423, + "args": { + "External id": 291379,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367450379.040, "dur": 0.664, + "args": { + "External id": 291380,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367450380.581, "dur": 2.299, + "args": { + "External id": 291381,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367450402.914, "dur": 22.309, + "args": { + "External id": 291382,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367450487.729, "dur": 319.984, + "args": { + "External id": 291383,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367450516.719, "dur": 286.024, + "args": { + "External id": 291384,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6916, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367450526.386, "dur": 270.436, + "args": { + "External id": 291385,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367450832.431, "dur": 2.388, + "args": { + "External id": 291386,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6918, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.25)", "pid": 2070552, "tid": 2070552, + "ts": 5333367450921.880, "dur": 17374.736, + "args": { + "External id": 291387,"Record function id": 0, "Ev Idx": 6919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367451020.074, "dur": 6.196, + "args": { + "External id": 291388,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367451029.868, "dur": 1.228, + "args": { + "External id": 291389,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367451032.812, "dur": 1.034, + "args": { + "External id": 291390,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367451035.524, "dur": 1.133, + "args": { + "External id": 291391,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367451038.112, "dur": 0.996, + "args": { + "External id": 291392,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367451040.368, "dur": 0.962, + "args": { + "External id": 291393,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 6925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367451044.857, "dur": 1.127, + "args": { + "External id": 291394,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 6926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367451047.420, "dur": 2.443, + "args": { + "External id": 291395,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367451051.313, "dur": 1.059, + "args": { + "External id": 291396,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367451053.792, "dur": 0.755, + "args": { + "External id": 291397,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 6929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367451074.365, "dur": 17178.394, + "args": { + "External id": 291398,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367451089.344, "dur": 17155.917, + "args": { + "External id": 291399,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 6931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367451113.083, "dur": 12.815, + "args": { + "External id": 291400,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367451129.662, "dur": 17082.080, + "args": { + "External id": 291401,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 6933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367451132.066, "dur": 17078.926, + "args": { + "External id": 291402,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 6934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367451137.720, "dur": 5.513, + "args": { + "External id": 291403,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367451144.823, "dur": 17062.846, + "args": { + "External id": 291404,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 6936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367468436.596, "dur": 32.562, + "args": { + "External id": 291405,"Sequence number": 1209211, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 6937 + } + }, + { + "ph": "s", "id": 21, "pid": 2070552, "tid": 2070552, "ts": 5333367468436.596, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367468454.909, "dur": 9.082, + "args": { + "External id": 291406,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 6938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367468458.912, "dur": 4.904, + "args": { + "External id": 291407,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 6939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367468532.760, "dur": 82.902, + "args": { + "External id": 291408,"Record function id": 0, "Ev Idx": 6940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367468617.155, "dur": 1101.294, + "args": { + "External id": 291409,"Record function id": 0, "Ev Idx": 6941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367468691.347, "dur": 1012.927, + "args": { + "External id": 291410,"Sequence number": 1209212, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 6942 + } + }, + { + "ph": "s", "id": 20, "pid": 2070552, "tid": 2070552, "ts": 5333367468691.347, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367468761.920, "dur": 44.470, + "args": { + "External id": 291411,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367468820.478, "dur": 106.694, + "args": { + "External id": 291412,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367468936.145, "dur": 38.194, + "args": { + "External id": 291413,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367468981.851, "dur": 30.262, + "args": { + "External id": 291414,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367469037.694, "dur": 26.093, + "args": { + "External id": 291415,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367469081.410, "dur": 13.521, + "args": { + "External id": 291416,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 6948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367469118.288, "dur": 148.238, + "args": { + "External id": 291417,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 6949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367469187.169, "dur": 12.784, + "args": { + "External id": 291418,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 6950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367469192.264, "dur": 6.686, + "args": { + "External id": 291419,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367469202.691, "dur": 4.164, + "args": { + "External id": 291420,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367469207.964, "dur": 1.286, + "args": { + "External id": 291421,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367469211.755, "dur": 2.379, + "args": { + "External id": 291422,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367469278.302, "dur": 49.379, + "args": { + "External id": 291423,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 6955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367469357.387, "dur": 28.380, + "args": { + "External id": 291424,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 6956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367469393.722, "dur": 40.307, + "args": { + "External id": 291425,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367469441.559, "dur": 34.335, + "args": { + "External id": 291426,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 6958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367469498.268, "dur": 23.812, + "args": { + "External id": 291427,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 6959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367469528.453, "dur": 33.794, + "args": { + "External id": 291428,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 6960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367469583.960, "dur": 18.117, + "args": { + "External id": 291429,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 6961 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.25)", "pid": 2070552, "tid": 2070552, + "ts": 5333367469781.954, "dur": 76.861, + "args": { + "External id": 291430,"Record function id": 0, "Ev Idx": 6962 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367469930.851, "dur": 46.438, + "args": { + "External id": 291431,"Record function id": 0, "Ev Idx": 6963 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.26)", "pid": 2070552, "tid": 2070552, + "ts": 5333367469985.968, "dur": 18632.811, + "args": { + "External id": 291432,"Record function id": 0, "Ev Idx": 6964 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.26)", "pid": 2070552, "tid": 2070552, + "ts": 5333367469994.388, "dur": 835.315, + "args": { + "External id": 291433,"Record function id": 0, "Ev Idx": 6965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367470075.751, "dur": 8.911, + "args": { + "External id": 291434,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367470098.910, "dur": 31.366, + "args": { + "External id": 291435,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367470103.960, "dur": 2.355, + "args": { + "External id": 291436,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367470110.202, "dur": 0.664, + "args": { + "External id": 291437,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367470111.729, "dur": 0.411, + "args": { + "External id": 291438,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367470112.877, "dur": 0.736, + "args": { + "External id": 291439,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367470116.386, "dur": 0.535, + "args": { + "External id": 291440,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367470117.757, "dur": 0.500, + "args": { + "External id": 291441,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367470119.210, "dur": 2.390, + "args": { + "External id": 291442,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367470122.733, "dur": 0.377, + "args": { + "External id": 291443,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367470123.956, "dur": 0.427, + "args": { + "External id": 291444,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367470141.215, "dur": 61.049, + "args": { + "External id": 291445,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367470238.881, "dur": 122.876, + "args": { + "External id": 291446,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 6978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367470250.270, "dur": 10.156, + "args": { + "External id": 291447,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 6979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367470266.008, "dur": 11.060, + "args": { + "External id": 291448,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367470270.503, "dur": 6.100, + "args": { + "External id": 291449,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 6981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367470274.106, "dur": 0.870, + "args": { + "External id": 291450,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 6982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367470288.719, "dur": 25.918, + "args": { + "External id": 291451,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 6983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367470290.673, "dur": 2.430, + "args": { + "External id": 291452,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367470294.343, "dur": 0.690, + "args": { + "External id": 291453,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367470296.057, "dur": 0.723, + "args": { + "External id": 291454,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367470299.337, "dur": 0.457, + "args": { + "External id": 291455,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367470301.058, "dur": 0.682, + "args": { + "External id": 291456,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367470302.726, "dur": 0.566, + "args": { + "External id": 291457,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367470305.798, "dur": 0.615, + "args": { + "External id": 291458,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367470307.374, "dur": 0.384, + "args": { + "External id": 291459,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367470308.791, "dur": 1.938, + "args": { + "External id": 291460,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 6992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367470328.815, "dur": 24.400, + "args": { + "External id": 291461,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 6993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367470415.588, "dur": 322.989, + "args": { + "External id": 291462,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 6994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367470444.094, "dur": 289.335, + "args": { + "External id": 291463,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 6995, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367470453.546, "dur": 274.214, + "args": { + "External id": 291464,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 6996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367470762.183, "dur": 2.209, + "args": { + "External id": 291465,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 6997, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.26)", "pid": 2070552, "tid": 2070552, + "ts": 5333367470851.013, "dur": 17573.614, + "args": { + "External id": 291466,"Record function id": 0, "Ev Idx": 6998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367470946.946, "dur": 6.046, + "args": { + "External id": 291467,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 6999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367470956.334, "dur": 1.101, + "args": { + "External id": 291468,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367470959.036, "dur": 1.554, + "args": { + "External id": 291469,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367470962.144, "dur": 0.978, + "args": { + "External id": 291470,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367470964.435, "dur": 0.883, + "args": { + "External id": 291471,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367470966.699, "dur": 0.861, + "args": { + "External id": 291472,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367470971.069, "dur": 1.264, + "args": { + "External id": 291473,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367470973.601, "dur": 1.896, + "args": { + "External id": 291474,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367470976.808, "dur": 0.872, + "args": { + "External id": 291475,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367470978.893, "dur": 1.044, + "args": { + "External id": 291476,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367470999.790, "dur": 17380.431, + "args": { + "External id": 291477,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367471014.515, "dur": 17357.653, + "args": { + "External id": 291478,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367471034.586, "dur": 14.346, + "args": { + "External id": 291479,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367471052.864, "dur": 17285.460, + "args": { + "External id": 291480,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367471055.152, "dur": 17282.437, + "args": { + "External id": 291481,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367471060.311, "dur": 5.703, + "args": { + "External id": 291482,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367471067.699, "dur": 17266.570, + "args": { + "External id": 291483,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367488561.595, "dur": 32.882, + "args": { + "External id": 291484,"Sequence number": 1209213, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7016 + } + }, + { + "ph": "s", "id": 19, "pid": 2070552, "tid": 2070552, "ts": 5333367488561.595, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367488580.195, "dur": 9.457, + "args": { + "External id": 291485,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367488584.008, "dur": 5.387, + "args": { + "External id": 291486,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367488686.202, "dur": 84.205, + "args": { + "External id": 291487,"Record function id": 0, "Ev Idx": 7019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367488771.821, "dur": 1075.331, + "args": { + "External id": 291488,"Record function id": 0, "Ev Idx": 7020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367488812.473, "dur": 1021.504, + "args": { + "External id": 291489,"Sequence number": 1209214, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7021 + } + }, + { + "ph": "s", "id": 18, "pid": 2070552, "tid": 2070552, "ts": 5333367488812.473, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367488880.519, "dur": 46.145, + "args": { + "External id": 291490,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367488940.716, "dur": 103.768, + "args": { + "External id": 291491,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367489054.328, "dur": 39.426, + "args": { + "External id": 291492,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367489101.858, "dur": 30.240, + "args": { + "External id": 291493,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367489157.728, "dur": 43.799, + "args": { + "External id": 291494,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367489222.248, "dur": 14.814, + "args": { + "External id": 291495,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367489255.792, "dur": 125.973, + "args": { + "External id": 291496,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367489305.105, "dur": 12.149, + "args": { + "External id": 291497,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367489310.119, "dur": 6.273, + "args": { + "External id": 291498,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367489319.593, "dur": 4.462, + "args": { + "External id": 291499,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367489325.138, "dur": 1.068, + "args": { + "External id": 291500,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367489328.539, "dur": 3.606, + "args": { + "External id": 291501,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367489393.085, "dur": 48.984, + "args": { + "External id": 291502,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367489474.033, "dur": 29.028, + "args": { + "External id": 291503,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367489511.537, "dur": 41.160, + "args": { + "External id": 291504,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367489561.624, "dur": 34.784, + "args": { + "External id": 291505,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367489654.435, "dur": 28.044, + "args": { + "External id": 291506,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367489690.621, "dur": 38.668, + "args": { + "External id": 291507,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367489750.251, "dur": 18.208, + "args": { + "External id": 291508,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.26)", "pid": 2070552, "tid": 2070552, + "ts": 5333367489911.114, "dur": 74.624, + "args": { + "External id": 291509,"Record function id": 0, "Ev Idx": 7041 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367490056.620, "dur": 46.238, + "args": { + "External id": 291510,"Record function id": 0, "Ev Idx": 7042 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.27)", "pid": 2070552, "tid": 2070552, + "ts": 5333367490111.781, "dur": 18419.048, + "args": { + "External id": 291511,"Record function id": 0, "Ev Idx": 7043 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.27)", "pid": 2070552, "tid": 2070552, + "ts": 5333367490119.884, "dur": 868.332, + "args": { + "External id": 291512,"Record function id": 0, "Ev Idx": 7044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367490220.341, "dur": 9.964, + "args": { + "External id": 291513,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367490243.808, "dur": 35.269, + "args": { + "External id": 291514,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367490249.165, "dur": 2.365, + "args": { + "External id": 291515,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367490255.864, "dur": 1.409, + "args": { + "External id": 291516,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367490258.227, "dur": 0.544, + "args": { + "External id": 291517,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367490259.704, "dur": 0.602, + "args": { + "External id": 291518,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367490263.000, "dur": 0.917, + "args": { + "External id": 291519,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367490265.124, "dur": 0.607, + "args": { + "External id": 291520,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367490266.620, "dur": 2.565, + "args": { + "External id": 291521,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367490270.396, "dur": 0.619, + "args": { + "External id": 291522,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367490272.184, "dur": 0.473, + "args": { + "External id": 291523,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367490289.787, "dur": 39.681, + "args": { + "External id": 291524,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367490364.436, "dur": 107.315, + "args": { + "External id": 291525,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367490375.352, "dur": 4.480, + "args": { + "External id": 291526,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367490384.993, "dur": 10.144, + "args": { + "External id": 291527,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367490389.386, "dur": 5.306, + "args": { + "External id": 291528,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367490392.659, "dur": 0.779, + "args": { + "External id": 291529,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367490402.374, "dur": 24.633, + "args": { + "External id": 291530,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367490404.272, "dur": 2.180, + "args": { + "External id": 291531,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367490407.434, "dur": 0.654, + "args": { + "External id": 291532,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367490408.979, "dur": 0.693, + "args": { + "External id": 291533,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367490412.369, "dur": 0.392, + "args": { + "External id": 291534,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367490413.503, "dur": 0.558, + "args": { + "External id": 291535,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367490415.039, "dur": 0.611, + "args": { + "External id": 291536,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367490418.414, "dur": 0.770, + "args": { + "External id": 291537,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367490419.783, "dur": 0.583, + "args": { + "External id": 291538,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367490421.127, "dur": 2.090, + "args": { + "External id": 291539,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367490441.165, "dur": 22.145, + "args": { + "External id": 291540,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367490526.928, "dur": 366.482, + "args": { + "External id": 291541,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367490556.179, "dur": 332.164, + "args": { + "External id": 291542,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7074, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367490565.834, "dur": 315.896, + "args": { + "External id": 291543,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367490918.263, "dur": 2.709, + "args": { + "External id": 291544,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7076, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.27)", "pid": 2070552, "tid": 2070552, + "ts": 5333367491008.689, "dur": 17321.259, + "args": { + "External id": 291545,"Record function id": 0, "Ev Idx": 7077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367491105.885, "dur": 6.619, + "args": { + "External id": 291546,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367491115.943, "dur": 1.051, + "args": { + "External id": 291547,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367491118.666, "dur": 1.062, + "args": { + "External id": 291548,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367491121.214, "dur": 1.174, + "args": { + "External id": 291549,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367491123.737, "dur": 1.181, + "args": { + "External id": 291550,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367491126.489, "dur": 1.248, + "args": { + "External id": 291551,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367491131.521, "dur": 0.818, + "args": { + "External id": 291552,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367491133.933, "dur": 1.949, + "args": { + "External id": 291553,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367491137.035, "dur": 1.199, + "args": { + "External id": 291554,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367491139.986, "dur": 0.748, + "args": { + "External id": 291555,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367491160.786, "dur": 17123.095, + "args": { + "External id": 291556,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367491194.561, "dur": 17081.770, + "args": { + "External id": 291557,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367491218.048, "dur": 13.945, + "args": { + "External id": 291558,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367491236.060, "dur": 17007.003, + "args": { + "External id": 291559,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367491238.538, "dur": 17003.704, + "args": { + "External id": 291560,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367491244.553, "dur": 6.107, + "args": { + "External id": 291561,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367491252.589, "dur": 16986.569, + "args": { + "External id": 291562,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367508475.257, "dur": 31.227, + "args": { + "External id": 291563,"Sequence number": 1209215, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7095 + } + }, + { + "ph": "s", "id": 17, "pid": 2070552, "tid": 2070552, "ts": 5333367508475.257, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367508493.076, "dur": 8.859, + "args": { + "External id": 291564,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367508496.531, "dur": 5.138, + "args": { + "External id": 291565,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367508568.164, "dur": 125.669, + "args": { + "External id": 291566,"Record function id": 0, "Ev Idx": 7098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367508697.873, "dur": 1082.040, + "args": { + "External id": 291567,"Record function id": 0, "Ev Idx": 7099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367508741.542, "dur": 1025.015, + "args": { + "External id": 291568,"Sequence number": 1209216, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7100 + } + }, + { + "ph": "s", "id": 16, "pid": 2070552, "tid": 2070552, "ts": 5333367508741.542, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367508813.852, "dur": 45.787, + "args": { + "External id": 291569,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367508873.832, "dur": 105.381, + "args": { + "External id": 291570,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367508988.220, "dur": 38.515, + "args": { + "External id": 291571,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367509036.170, "dur": 30.182, + "args": { + "External id": 291572,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367509096.506, "dur": 27.378, + "args": { + "External id": 291573,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367509140.935, "dur": 13.208, + "args": { + "External id": 291574,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367509188.695, "dur": 133.285, + "args": { + "External id": 291575,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367509239.623, "dur": 12.551, + "args": { + "External id": 291576,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367509244.736, "dur": 6.490, + "args": { + "External id": 291577,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367509257.110, "dur": 3.993, + "args": { + "External id": 291578,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367509262.426, "dur": 1.237, + "args": { + "External id": 291579,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367509267.820, "dur": 3.225, + "args": { + "External id": 291580,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367509334.164, "dur": 49.480, + "args": { + "External id": 291581,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367509415.802, "dur": 28.057, + "args": { + "External id": 291582,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367509451.927, "dur": 40.678, + "args": { + "External id": 291583,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367509499.260, "dur": 34.709, + "args": { + "External id": 291584,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367509555.173, "dur": 26.276, + "args": { + "External id": 291585,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367509586.846, "dur": 67.618, + "args": { + "External id": 291586,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367509679.043, "dur": 20.164, + "args": { + "External id": 291587,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7119 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.27)", "pid": 2070552, "tid": 2070552, + "ts": 5333367509841.700, "dur": 75.891, + "args": { + "External id": 291588,"Record function id": 0, "Ev Idx": 7120 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367509987.862, "dur": 42.847, + "args": { + "External id": 291589,"Record function id": 0, "Ev Idx": 7121 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.28)", "pid": 2070552, "tid": 2070552, + "ts": 5333367510039.242, "dur": 18392.624, + "args": { + "External id": 291590,"Record function id": 0, "Ev Idx": 7122 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.28)", "pid": 2070552, "tid": 2070552, + "ts": 5333367510046.673, "dur": 815.869, + "args": { + "External id": 291591,"Record function id": 0, "Ev Idx": 7123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367510128.687, "dur": 7.785, + "args": { + "External id": 291592,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367510149.931, "dur": 51.784, + "args": { + "External id": 291593,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367510155.182, "dur": 2.380, + "args": { + "External id": 291594,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367510161.546, "dur": 0.262, + "args": { + "External id": 291595,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367510163.408, "dur": 0.204, + "args": { + "External id": 291596,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367510164.553, "dur": 0.841, + "args": { + "External id": 291597,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367510183.499, "dur": 0.821, + "args": { + "External id": 291598,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367510186.288, "dur": 0.671, + "args": { + "External id": 291599,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367510188.317, "dur": 2.679, + "args": { + "External id": 291600,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367510192.451, "dur": 0.757, + "args": { + "External id": 291601,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367510194.806, "dur": 0.730, + "args": { + "External id": 291602,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367510213.672, "dur": 40.326, + "args": { + "External id": 291603,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367510287.348, "dur": 107.412, + "args": { + "External id": 291604,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367510298.466, "dur": 4.538, + "args": { + "External id": 291605,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367510308.115, "dur": 10.040, + "args": { + "External id": 291606,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367510312.368, "dur": 5.337, + "args": { + "External id": 291607,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367510315.790, "dur": 0.707, + "args": { + "External id": 291608,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367510324.667, "dur": 28.636, + "args": { + "External id": 291609,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367510326.615, "dur": 2.135, + "args": { + "External id": 291610,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367510330.037, "dur": 0.582, + "args": { + "External id": 291611,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367510331.846, "dur": 0.725, + "args": { + "External id": 291612,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367510335.703, "dur": 0.952, + "args": { + "External id": 291613,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367510337.863, "dur": 1.033, + "args": { + "External id": 291614,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367510339.793, "dur": 0.984, + "args": { + "External id": 291615,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367510343.791, "dur": 0.736, + "args": { + "External id": 291616,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367510345.399, "dur": 0.727, + "args": { + "External id": 291617,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367510347.401, "dur": 1.933, + "args": { + "External id": 291618,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367510365.777, "dur": 20.517, + "args": { + "External id": 291619,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367510446.446, "dur": 320.758, + "args": { + "External id": 291620,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367510475.025, "dur": 287.275, + "args": { + "External id": 291621,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7153, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367510485.076, "dur": 269.985, + "args": { + "External id": 291622,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367510791.447, "dur": 2.658, + "args": { + "External id": 291623,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7155, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.28)", "pid": 2070552, "tid": 2070552, + "ts": 5333367510883.305, "dur": 17348.363, + "args": { + "External id": 291624,"Record function id": 0, "Ev Idx": 7156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367510981.101, "dur": 6.099, + "args": { + "External id": 291625,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367510990.717, "dur": 1.521, + "args": { + "External id": 291626,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367510994.073, "dur": 1.394, + "args": { + "External id": 291627,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367510997.351, "dur": 0.946, + "args": { + "External id": 291628,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367510999.941, "dur": 0.994, + "args": { + "External id": 291629,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367511002.117, "dur": 1.576, + "args": { + "External id": 291630,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367511007.431, "dur": 1.384, + "args": { + "External id": 291631,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367511010.065, "dur": 1.831, + "args": { + "External id": 291632,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367511013.518, "dur": 0.651, + "args": { + "External id": 291633,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367511015.389, "dur": 1.008, + "args": { + "External id": 291634,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367511035.664, "dur": 17151.735, + "args": { + "External id": 291635,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367511050.606, "dur": 17128.769, + "args": { + "External id": 291636,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367511068.488, "dur": 12.830, + "args": { + "External id": 291637,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367511085.308, "dur": 17051.669, + "args": { + "External id": 291638,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367511087.884, "dur": 17048.404, + "args": { + "External id": 291639,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367511093.827, "dur": 6.033, + "args": { + "External id": 291640,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367511101.372, "dur": 17031.810, + "args": { + "External id": 291641,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367528374.624, "dur": 33.097, + "args": { + "External id": 291642,"Sequence number": 1209217, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7174 + } + }, + { + "ph": "s", "id": 15, "pid": 2070552, "tid": 2070552, "ts": 5333367528374.624, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367528394.163, "dur": 8.864, + "args": { + "External id": 291643,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367528397.735, "dur": 5.076, + "args": { + "External id": 291644,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367528469.981, "dur": 82.216, + "args": { + "External id": 291645,"Record function id": 0, "Ev Idx": 7177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367528553.996, "dur": 1050.666, + "args": { + "External id": 291646,"Record function id": 0, "Ev Idx": 7178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367528594.078, "dur": 998.151, + "args": { + "External id": 291647,"Sequence number": 1209218, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7179 + } + }, + { + "ph": "s", "id": 14, "pid": 2070552, "tid": 2070552, "ts": 5333367528594.078, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367528693.752, "dur": 50.341, + "args": { + "External id": 291648,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367528758.350, "dur": 106.509, + "args": { + "External id": 291649,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367528873.348, "dur": 37.460, + "args": { + "External id": 291650,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367528919.269, "dur": 30.605, + "args": { + "External id": 291651,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367528975.230, "dur": 23.925, + "args": { + "External id": 291652,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367529016.532, "dur": 14.602, + "args": { + "External id": 291653,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367529049.129, "dur": 141.924, + "args": { + "External id": 291654,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367529100.266, "dur": 10.982, + "args": { + "External id": 291655,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367529104.959, "dur": 5.547, + "args": { + "External id": 291656,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367529113.694, "dur": 4.436, + "args": { + "External id": 291657,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367529119.399, "dur": 1.275, + "args": { + "External id": 291658,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367529123.300, "dur": 3.362, + "args": { + "External id": 291659,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367529202.695, "dur": 49.262, + "args": { + "External id": 291660,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367529284.390, "dur": 27.202, + "args": { + "External id": 291661,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367529319.826, "dur": 40.985, + "args": { + "External id": 291662,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367529370.231, "dur": 34.095, + "args": { + "External id": 291663,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367529427.718, "dur": 25.740, + "args": { + "External id": 291664,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367529459.171, "dur": 33.894, + "args": { + "External id": 291665,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367529511.620, "dur": 16.787, + "args": { + "External id": 291666,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7198 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.28)", "pid": 2070552, "tid": 2070552, + "ts": 5333367529703.835, "dur": 74.842, + "args": { + "External id": 291667,"Record function id": 0, "Ev Idx": 7199 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367529852.892, "dur": 45.393, + "args": { + "External id": 291668,"Record function id": 0, "Ev Idx": 7200 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.29)", "pid": 2070552, "tid": 2070552, + "ts": 5333367529906.993, "dur": 18459.812, + "args": { + "External id": 291669,"Record function id": 0, "Ev Idx": 7201 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.29)", "pid": 2070552, "tid": 2070552, + "ts": 5333367529915.227, "dur": 825.846, + "args": { + "External id": 291670,"Record function id": 0, "Ev Idx": 7202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367529995.577, "dur": 9.214, + "args": { + "External id": 291671,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367530019.232, "dur": 35.322, + "args": { + "External id": 291672,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367530024.790, "dur": 2.540, + "args": { + "External id": 291673,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367530031.847, "dur": 0.765, + "args": { + "External id": 291674,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367530033.652, "dur": 0.544, + "args": { + "External id": 291675,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367530035.360, "dur": 0.726, + "args": { + "External id": 291676,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367530039.646, "dur": 0.520, + "args": { + "External id": 291677,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367530041.515, "dur": 0.505, + "args": { + "External id": 291678,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367530042.886, "dur": 2.345, + "args": { + "External id": 291679,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367530046.519, "dur": 0.614, + "args": { + "External id": 291680,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367530048.239, "dur": 0.639, + "args": { + "External id": 291681,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367530065.196, "dur": 40.935, + "args": { + "External id": 291682,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367530139.173, "dur": 135.027, + "args": { + "External id": 291683,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367530149.882, "dur": 4.113, + "args": { + "External id": 291684,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367530158.814, "dur": 29.037, + "args": { + "External id": 291685,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367530163.175, "dur": 24.197, + "args": { + "External id": 291686,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367530184.222, "dur": 1.056, + "args": { + "External id": 291687,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367530195.472, "dur": 30.138, + "args": { + "External id": 291688,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367530197.877, "dur": 2.865, + "args": { + "External id": 291689,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367530202.106, "dur": 0.924, + "args": { + "External id": 291690,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367530204.393, "dur": 0.482, + "args": { + "External id": 291691,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367530208.513, "dur": 0.631, + "args": { + "External id": 291692,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367530210.334, "dur": 0.433, + "args": { + "External id": 291693,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367530212.307, "dur": 0.679, + "args": { + "External id": 291694,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367530215.997, "dur": 0.745, + "args": { + "External id": 291695,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367530217.768, "dur": 0.658, + "args": { + "External id": 291696,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367530219.368, "dur": 2.340, + "args": { + "External id": 291697,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367530240.832, "dur": 25.092, + "args": { + "External id": 291698,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367530329.531, "dur": 283.480, + "args": { + "External id": 291699,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367530359.020, "dur": 249.992, + "args": { + "External id": 291700,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7232, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367530368.887, "dur": 234.900, + "args": { + "External id": 291701,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367530670.470, "dur": 3.246, + "args": { + "External id": 291702,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7234, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.29)", "pid": 2070552, "tid": 2070552, + "ts": 5333367530760.214, "dur": 17392.630, + "args": { + "External id": 291703,"Record function id": 0, "Ev Idx": 7235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367530857.273, "dur": 5.978, + "args": { + "External id": 291704,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367530866.456, "dur": 1.271, + "args": { + "External id": 291705,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367530869.378, "dur": 1.431, + "args": { + "External id": 291706,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367530872.605, "dur": 1.004, + "args": { + "External id": 291707,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367530875.007, "dur": 1.200, + "args": { + "External id": 291708,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367530877.441, "dur": 0.953, + "args": { + "External id": 291709,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367530881.780, "dur": 1.091, + "args": { + "External id": 291710,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367530884.421, "dur": 2.415, + "args": { + "External id": 291711,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367530888.241, "dur": 1.062, + "args": { + "External id": 291712,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367530890.960, "dur": 1.057, + "args": { + "External id": 291713,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367530922.094, "dur": 17183.434, + "args": { + "External id": 291714,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367530937.661, "dur": 17159.531, + "args": { + "External id": 291715,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367530953.099, "dur": 14.507, + "args": { + "External id": 291716,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367530971.295, "dur": 17092.059, + "args": { + "External id": 291717,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367530973.837, "dur": 17088.715, + "args": { + "External id": 291718,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367530979.609, "dur": 6.376, + "args": { + "External id": 291719,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367530987.484, "dur": 17071.584, + "args": { + "External id": 291720,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367548311.011, "dur": 30.645, + "args": { + "External id": 291721,"Sequence number": 1209219, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7253 + } + }, + { + "ph": "s", "id": 13, "pid": 2070552, "tid": 2070552, "ts": 5333367548311.011, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367548327.009, "dur": 9.877, + "args": { + "External id": 291722,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367548331.001, "dur": 5.442, + "args": { + "External id": 291723,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367548404.459, "dur": 83.910, + "args": { + "External id": 291724,"Record function id": 0, "Ev Idx": 7256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367548489.832, "dur": 1076.474, + "args": { + "External id": 291725,"Record function id": 0, "Ev Idx": 7257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367548530.777, "dur": 1021.368, + "args": { + "External id": 291726,"Sequence number": 1209220, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7258 + } + }, + { + "ph": "s", "id": 12, "pid": 2070552, "tid": 2070552, "ts": 5333367548530.777, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367548597.241, "dur": 91.230, + "args": { + "External id": 291727,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367548707.860, "dur": 105.521, + "args": { + "External id": 291728,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367548823.032, "dur": 38.922, + "args": { + "External id": 291729,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367548868.924, "dur": 30.918, + "args": { + "External id": 291730,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367548929.949, "dur": 23.945, + "args": { + "External id": 291731,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367548969.438, "dur": 16.210, + "args": { + "External id": 291732,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367549002.528, "dur": 127.880, + "args": { + "External id": 291733,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367549053.007, "dur": 11.650, + "args": { + "External id": 291734,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367549057.691, "dur": 6.111, + "args": { + "External id": 291735,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367549067.476, "dur": 4.130, + "args": { + "External id": 291736,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367549072.797, "dur": 2.897, + "args": { + "External id": 291737,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367549078.352, "dur": 3.018, + "args": { + "External id": 291738,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367549140.969, "dur": 63.086, + "args": { + "External id": 291739,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367549241.280, "dur": 29.499, + "args": { + "External id": 291740,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367549280.747, "dur": 43.637, + "args": { + "External id": 291741,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367549331.733, "dur": 35.577, + "args": { + "External id": 291742,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367549390.586, "dur": 22.754, + "args": { + "External id": 291743,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367549421.848, "dur": 32.951, + "args": { + "External id": 291744,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367549471.541, "dur": 17.987, + "args": { + "External id": 291745,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7277 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.29)", "pid": 2070552, "tid": 2070552, + "ts": 5333367549665.756, "dur": 77.067, + "args": { + "External id": 291746,"Record function id": 0, "Ev Idx": 7278 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367549818.525, "dur": 48.147, + "args": { + "External id": 291747,"Record function id": 0, "Ev Idx": 7279 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.30)", "pid": 2070552, "tid": 2070552, + "ts": 5333367549875.964, "dur": 18506.591, + "args": { + "External id": 291748,"Record function id": 0, "Ev Idx": 7280 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.30)", "pid": 2070552, "tid": 2070552, + "ts": 5333367549884.702, "dur": 813.699, + "args": { + "External id": 291749,"Record function id": 0, "Ev Idx": 7281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367549968.285, "dur": 8.429, + "args": { + "External id": 291750,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367549991.202, "dur": 36.117, + "args": { + "External id": 291751,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367549996.825, "dur": 2.675, + "args": { + "External id": 291752,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367550003.537, "dur": 0.875, + "args": { + "External id": 291753,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367550005.669, "dur": 0.729, + "args": { + "External id": 291754,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367550007.345, "dur": 0.434, + "args": { + "External id": 291755,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367550010.920, "dur": 0.683, + "args": { + "External id": 291756,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367550012.523, "dur": 0.720, + "args": { + "External id": 291757,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367550014.544, "dur": 2.154, + "args": { + "External id": 291758,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367550017.951, "dur": 0.809, + "args": { + "External id": 291759,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367550020.333, "dur": 0.741, + "args": { + "External id": 291760,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367550038.604, "dur": 42.063, + "args": { + "External id": 291761,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367550112.768, "dur": 129.269, + "args": { + "External id": 291762,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367550123.248, "dur": 3.854, + "args": { + "External id": 291763,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367550132.197, "dur": 9.956, + "args": { + "External id": 291764,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367550136.318, "dur": 5.410, + "args": { + "External id": 291765,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367550139.735, "dur": 0.749, + "args": { + "External id": 291766,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367550148.117, "dur": 45.035, + "args": { + "External id": 291767,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367550150.087, "dur": 2.768, + "args": { + "External id": 291768,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367550154.083, "dur": 0.586, + "args": { + "External id": 291769,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367550155.822, "dur": 0.860, + "args": { + "External id": 291770,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367550159.421, "dur": 0.641, + "args": { + "External id": 291771,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367550161.446, "dur": 0.596, + "args": { + "External id": 291772,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367550162.967, "dur": 0.436, + "args": { + "External id": 291773,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367550182.008, "dur": 0.784, + "args": { + "External id": 291774,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367550184.726, "dur": 0.640, + "args": { + "External id": 291775,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367550186.702, "dur": 1.918, + "args": { + "External id": 291776,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367550207.407, "dur": 25.531, + "args": { + "External id": 291777,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367550296.398, "dur": 280.015, + "args": { + "External id": 291778,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367550326.196, "dur": 245.965, + "args": { + "External id": 291779,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7311, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367550336.324, "dur": 230.768, + "args": { + "External id": 291780,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367550596.438, "dur": 2.403, + "args": { + "External id": 291781,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7313, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.30)", "pid": 2070552, "tid": 2070552, + "ts": 5333367550720.123, "dur": 17440.317, + "args": { + "External id": 291782,"Record function id": 0, "Ev Idx": 7314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367550813.466, "dur": 6.038, + "args": { + "External id": 291783,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367550823.151, "dur": 1.261, + "args": { + "External id": 291784,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367550826.133, "dur": 1.114, + "args": { + "External id": 291785,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367550828.943, "dur": 1.366, + "args": { + "External id": 291786,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367550832.029, "dur": 1.383, + "args": { + "External id": 291787,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367550834.785, "dur": 1.069, + "args": { + "External id": 291788,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367550839.651, "dur": 1.092, + "args": { + "External id": 291789,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367550842.029, "dur": 1.935, + "args": { + "External id": 291790,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367550845.587, "dur": 0.922, + "args": { + "External id": 291791,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367550848.052, "dur": 0.992, + "args": { + "External id": 291792,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367550867.862, "dur": 17245.459, + "args": { + "External id": 291793,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367550883.832, "dur": 17221.470, + "args": { + "External id": 291794,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367550901.103, "dur": 13.821, + "args": { + "External id": 291795,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367550918.775, "dur": 17151.773, + "args": { + "External id": 291796,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367550921.305, "dur": 17148.351, + "args": { + "External id": 291797,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367550927.437, "dur": 6.575, + "args": { + "External id": 291798,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367550935.689, "dur": 17130.800, + "args": { + "External id": 291799,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367568322.175, "dur": 34.608, + "args": { + "External id": 291800,"Sequence number": 1209221, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7332 + } + }, + { + "ph": "s", "id": 11, "pid": 2070552, "tid": 2070552, "ts": 5333367568322.175, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367568341.898, "dur": 9.805, + "args": { + "External id": 291801,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367568345.580, "dur": 5.712, + "args": { + "External id": 291802,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367568420.768, "dur": 82.799, + "args": { + "External id": 291803,"Record function id": 0, "Ev Idx": 7335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367568505.472, "dur": 1051.206, + "args": { + "External id": 291804,"Record function id": 0, "Ev Idx": 7336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367568546.476, "dur": 997.111, + "args": { + "External id": 291805,"Sequence number": 1209222, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7337 + } + }, + { + "ph": "s", "id": 10, "pid": 2070552, "tid": 2070552, "ts": 5333367568546.476, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367568611.728, "dur": 85.609, + "args": { + "External id": 291806,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367568713.393, "dur": 106.113, + "args": { + "External id": 291807,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367568828.756, "dur": 37.090, + "args": { + "External id": 291808,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367568874.857, "dur": 30.727, + "args": { + "External id": 291809,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367568932.274, "dur": 23.488, + "args": { + "External id": 291810,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367568972.502, "dur": 14.502, + "args": { + "External id": 291811,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367569005.827, "dur": 122.898, + "args": { + "External id": 291812,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367569052.590, "dur": 11.612, + "args": { + "External id": 291813,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367569057.397, "dur": 6.012, + "args": { + "External id": 291814,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367569066.775, "dur": 4.395, + "args": { + "External id": 291815,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367569072.287, "dur": 1.397, + "args": { + "External id": 291816,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367569076.131, "dur": 4.254, + "args": { + "External id": 291817,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367569138.369, "dur": 60.238, + "args": { + "External id": 291818,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367569232.891, "dur": 27.231, + "args": { + "External id": 291819,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367569268.522, "dur": 42.694, + "args": { + "External id": 291820,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367569319.983, "dur": 34.227, + "args": { + "External id": 291821,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367569377.969, "dur": 23.936, + "args": { + "External id": 291822,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367569408.831, "dur": 34.248, + "args": { + "External id": 291823,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367569462.426, "dur": 17.026, + "args": { + "External id": 291824,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7356 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.30)", "pid": 2070552, "tid": 2070552, + "ts": 5333367569655.323, "dur": 78.438, + "args": { + "External id": 291825,"Record function id": 0, "Ev Idx": 7357 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::cast_forward_inputs", "pid": 2070552, "tid": 2070552, + "ts": 5333367569810.255, "dur": 45.898, + "args": { + "External id": 291826,"Record function id": 0, "Ev Idx": 7358 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::pre_forward (model.layers.31)", "pid": 2070552, "tid": 2070552, + "ts": 5333367569865.814, "dur": 18483.686, + "args": { + "External id": 291827,"Record function id": 0, "Ev Idx": 7359 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather (model.layers.31)", "pid": 2070552, "tid": 2070552, + "ts": 5333367569873.037, "dur": 841.105, + "args": { + "External id": 291828,"Record function id": 0, "Ev Idx": 7360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367569954.026, "dur": 8.948, + "args": { + "External id": 291829,"Record function id": 0, "Concrete Inputs": ["[6423040]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367569977.222, "dur": 34.234, + "args": { + "External id": 291830,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367569982.519, "dur": 2.306, + "args": { + "External id": 291831,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367569988.484, "dur": 0.563, + "args": { + "External id": 291832,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "256"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367569990.384, "dur": 0.831, + "args": { + "External id": 291833,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "524544"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367569992.959, "dur": 0.810, + "args": { + "External id": 291834,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1048832"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367569996.257, "dur": 0.588, + "args": { + "External id": 291835,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "1573120"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367569998.243, "dur": 0.727, + "args": { + "External id": 291836,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "2097408"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367570000.383, "dur": 1.481, + "args": { + "External id": 291837,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "2097664"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367570003.166, "dur": 0.960, + "args": { + "External id": 291838,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "3539456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367570005.044, "dur": 0.738, + "args": { + "External id": 291839,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "4981248"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367570022.635, "dur": 38.350, + "args": { + "External id": 291840,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::all_gather_copy_in", "pid": 2070552, "tid": 2070552, + "ts": 5333367570118.443, "dur": 126.880, + "args": { + "External id": 291841,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "6423040", "8", "5", "15", ""], "Input type": ["TensorList", "ScalarList", "Scalar", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [], [], [], [], [], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [], [], [], [], [], []], "Ev Idx": 7373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367570129.658, "dur": 5.123, + "args": { + "External id": 291842,"Record function id": 0, "Concrete Inputs": ["[51384320]", "15", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::narrow", "pid": 2070552, "tid": 2070552, + "ts": 5333367570139.934, "dur": 10.053, + "args": { + "External id": 291843,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "6423040"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367570144.199, "dur": 5.273, + "args": { + "External id": 291844,"Record function id": 0, "Concrete Inputs": ["", "0", "32115200", "38538240", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[51384320], [], [], [], []], "Ev Idx": 7376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367570147.483, "dur": 0.809, + "args": { + "External id": 291845,"Record function id": 0, "Concrete Inputs": ["", "[6423040]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[51384320], [], [], []], "Ev Idx": 7377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes", "pid": 2070552, "tid": 2070552, + "ts": 5333367570156.466, "dur": 39.527, + "args": { + "External id": 291846,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar"], "Input Strides": [[1], [], []], "Input Dims": [[6423040], [], []], "Ev Idx": 7378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367570157.901, "dur": 0.499, + "args": { + "External id": 291847,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "32115200"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367570159.743, "dur": 1.859, + "args": { + "External id": 291848,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32115456"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367570162.730, "dur": 0.529, + "args": { + "External id": 291849,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "32639744"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367570164.486, "dur": 0.589, + "args": { + "External id": 291850,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33164032"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367570182.203, "dur": 0.935, + "args": { + "External id": 291851,"Record function id": 0, "Concrete Inputs": ["", "[524288]", "[1]", "33688320"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367570185.318, "dur": 0.504, + "args": { + "External id": 291852,"Record function id": 0, "Concrete Inputs": ["", "[256]", "[1]", "34212608"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367570186.479, "dur": 0.619, + "args": { + "External id": 291853,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "34212864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367570190.325, "dur": 0.636, + "args": { + "External id": 291854,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "35654656"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367570191.705, "dur": 0.477, + "args": { + "External id": 291855,"Record function id": 0, "Concrete Inputs": ["", "[1441792]", "[1]", "37096448"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[6423040], [], [], []], "Ev Idx": 7387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367570212.006, "dur": 25.212, + "args": { + "External id": 291856,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["TensorList", "TensorList", "Scalar"], "Input Strides": [[[1], [1], [1], [1], [1], [1], [1], [1], [1]], [[1], [1], [1], [1], [1], [1], [1], [1], [1]], []], "Input Dims": [[[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], [[256], [524288], [524288], [524288], [524288], [256], [1441792], [1441792], [1441792]], []], "Ev Idx": 7388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::_allgather_base_", "pid": 2070552, "tid": 2070552, + "ts": 5333367570300.547, "dur": 282.725, + "args": { + "External id": 291857,"Record function id": 0, "Concrete Inputs": ["", "", "", "False", "-1"], "Input type": ["c10::BFloat16", "c10::BFloat16", "", "Scalar", "Scalar"], "Input Strides": [[1], [1], [], [], []], "Input Dims": [[51384320], [6423040], [], [], []], "Ev Idx": 7389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367570330.548, "dur": 248.284, + "args": { + "External id": 291858,"Record function id": 0, "Collective name": "_allgather_base", "Process Group Description": "default_pg", "dtype": "BFloat16", "Rank": 5, "Input Strides": [[1], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 51384320, "Process Group Name": "0", "Input type": ["c10::BFloat16", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[6423040], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 7390, "In msg nelems": 6423040 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:_all_gather_base", "pid": 2070552, "tid": 2070552, + "ts": 5333367570340.398, "dur": 233.187, + "args": { + "External id": 291859,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[1]], "Input Dims": [[6423040]], "Ev Idx": 7391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333367570604.713, "dur": 2.149, + "args": { + "External id": 291860,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 7392, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::all_gather_copy_out (model.layers.31)", "pid": 2070552, "tid": 2070552, + "ts": 5333367570736.037, "dur": 17394.076, + "args": { + "External id": 291861,"Record function id": 0, "Ev Idx": 7393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367570845.741, "dur": 6.137, + "args": { + "External id": 291862,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[51384320], []], "Ev Idx": 7394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367570855.595, "dur": 1.363, + "args": { + "External id": 291863,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367570858.834, "dur": 1.026, + "args": { + "External id": 291864,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367570861.750, "dur": 0.994, + "args": { + "External id": 291865,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367570864.305, "dur": 1.557, + "args": { + "External id": 291866,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367570867.048, "dur": 1.171, + "args": { + "External id": 291867,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[4194304], []], "Ev Idx": 7399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367570872.069, "dur": 1.052, + "args": { + "External id": 291868,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[2048], []], "Ev Idx": 7400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367570874.539, "dur": 1.885, + "args": { + "External id": 291869,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367570877.956, "dur": 0.852, + "args": { + "External id": 291870,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367570880.438, "dur": 1.031, + "args": { + "External id": 291871,"Record function id": 0, "Concrete Inputs": ["", "[8, -1]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[11534336], []], "Ev Idx": 7403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "fsdp::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367570900.987, "dur": 17181.869, + "args": { + "External id": 291872,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::split_with_sizes_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367570916.539, "dur": 17158.848, + "args": { + "External id": 291873,"Record function id": 0, "Concrete Inputs": ["", "[256, 524288, 524288, 524288, 524288, 256, 1441792, 1441792, 1441792]", "1", ""], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "TensorList"], "Input Strides": [[6423040, 1], [], [], [[256, 1], [524288, 1], [524288, 1], [524288, 1], [524288, 1], [256, 1], [1441792, 1], [1441792, 1], [1441792, 1]]], "Input Dims": [[8, 6423040], [], [], [[8, 256], [8, 524288], [8, 524288], [8, 524288], [8, 524288], [8, 256], [8, 1441792], [8, 1441792], [8, 1441792]]], "Ev Idx": 7405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367570936.810, "dur": 14.475, + "args": { + "External id": 291874,"Record function id": 0, "Concrete Inputs": ["[2133]", "4", "", "", "True", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367570955.129, "dur": 17087.306, + "args": { + "External id": 291875,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", "False", ""], "Input type": ["long int", "", "", "", "", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], [], []], "Ev Idx": 7407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367570957.478, "dur": 17084.136, + "args": { + "External id": 291876,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "True", ""], "Input type": ["long int", "", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[2133], [], [], [], [], [], []], "Ev Idx": 7408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367570962.479, "dur": 6.033, + "args": { + "External id": 291877,"Record function id": 0, "Concrete Inputs": ["[2133]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367570970.086, "dur": 17068.227, + "args": { + "External id": 291878,"Record function id": 0, "Concrete Inputs": ["", "", "True"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[2133], [2133], []], "Ev Idx": 7410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "RegisterPostBackwardFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367588288.776, "dur": 34.971, + "args": { + "External id": 291879,"Sequence number": 1209223, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048]], "Ev Idx": 7411 + } + }, + { + "ph": "s", "id": 9, "pid": 2070552, "tid": 2070552, "ts": 5333367588288.776, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333367588309.078, "dur": 9.740, + "args": { + "External id": 291880,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [8388608, 2048, 1]], "Input Dims": [[16, 4096, 2048], [16, 4096, 2048]], "Ev Idx": 7412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367588312.834, "dur": 5.624, + "args": { + "External id": 291881,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367588387.025, "dur": 84.234, + "args": { + "External id": 291882,"Record function id": 0, "Ev Idx": 7414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 1/1", "pid": 2070552, "tid": 2070552, + "ts": 5333367588472.704, "dur": 1049.321, + "args": { + "External id": 291883,"Record function id": 0, "Ev Idx": 7415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367588514.412, "dur": 994.031, + "args": { + "External id": 291884,"Sequence number": 1209224, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "", "", "", "", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[8388608, 2048, 1], [1], [2048, 1], [2048, 1], [2048, 1], [32, 1], [32, 1], [2048, 1], [1], [2048, 1], [2048, 1], [5632, 1]], "Input Dims": [[16, 4096, 2048], [2048], [2048, 2048], [2048, 2048], [2048, 2048], [8192, 32], [8192, 32], [2048, 2048], [2048], [5632, 2048], [5632, 2048], [2048, 5632]], "Ev Idx": 7416 + } + }, + { + "ph": "s", "id": 8, "pid": 2070552, "tid": 2070552, "ts": 5333367588514.412, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367588580.327, "dur": 78.056, + "args": { + "External id": 291885,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367588675.437, "dur": 104.520, + "args": { + "External id": 291886,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367588790.885, "dur": 36.402, + "args": { + "External id": 291887,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367588833.838, "dur": 30.207, + "args": { + "External id": 291888,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367588896.331, "dur": 25.580, + "args": { + "External id": 291889,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "rotary_embedding_kernel_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367588937.685, "dur": 14.097, + "args": { + "External id": 291890,"kernel_hash": "cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "0", "16", "4096", "32", "64", "32", "8192", "32", "64", "False", "False", "False", "False"], "kernel_file": "/tmp/torchinductor_cvm/nv/cnv33h26fy2rthmzs354f4vz2xgtydpamfgzexzicshgttcje2uh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [32, 1], [32, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [8192, 32], [8192, 32], [16, 4096, 32, 64], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 7422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "flash_attn::_flash_attn_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367588970.359, "dur": 124.986, + "args": { + "External id": 291891,"Record function id": 0, "Concrete Inputs": ["", "", "", "0.", "0.125", "True", "-1", "-1", "0.", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [8388608, 2048, 64, 1], [], [], [], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [16, 4096, 32, 64], [16, 4096, 32, 64], [], [], [], [], [], [], [], []], "Ev Idx": 7423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367589019.634, "dur": 11.134, + "args": { + "External id": 291892,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["c10::BFloat16", "", "", "", "", ""], "Input Strides": [[8388608, 2048, 64, 1], [], [], [], [], []], "Input Dims": [[16, 4096, 32, 64], [], [], [], [], []], "Ev Idx": 7424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367589024.671, "dur": 5.315, + "args": { + "External id": 291893,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32, 64]", "[8388608, 2048, 64, 1]", "15", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367589033.149, "dur": 4.341, + "args": { + "External id": 291894,"Record function id": 0, "Concrete Inputs": ["[16, 32, 4096]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367589038.639, "dur": 1.126, + "args": { + "External id": 291895,"Record function id": 0, "Concrete Inputs": ["[0]", "15", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367589043.890, "dur": 3.192, + "args": { + "External id": 291896,"Record function id": 0, "Concrete Inputs": ["[2]", "4", "", "", "", ""], "Input type": ["ScalarList", "Scalar", "", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367589104.553, "dur": 44.334, + "args": { + "External id": 291897,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [2048, 1]], "Input Dims": [[65536, 2048], [2048, 2048], [65536, 2048]], "Ev Idx": 7429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_2", "pid": 2070552, "tid": 2070552, + "ts": 5333367589194.527, "dur": 30.019, + "args": { + "External id": 291898,"kernel_hash": "ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "True", "True", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/ta/ctat5wbctobuszefu3ftwk3sfiuxuacq6lvf6dpkuc6c5tnywglb.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [2048, 1], [2048, 1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536, 2048], [65536, 2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367589235.031, "dur": 44.097, + "args": { + "External id": 291899,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367589285.879, "dur": 34.773, + "args": { + "External id": 291900,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048], [5632, 1]], "Input Dims": [[65536, 2048], [2048, 5632], [65536, 5632]], "Ev Idx": 7432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused__to_copy_mul_silu_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367589345.247, "dur": 26.184, + "args": { + "External id": 291901,"kernel_hash": "cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3", "grid": "grid(369098752,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "369098752"], "kernel_file": "/tmp/torchinductor_cvm/f5/cf5pe4xfa7zf53qqkc2wmazk2q52sqbkd5k2snmgharcyrbqo2l3.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[5632, 1], [5632, 1], [23068672, 5632, 1], []], "Input Dims": [[65536, 5632], [65536, 5632], [16, 4096, 5632], []], "Ev Idx": 7433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367589376.941, "dur": 33.128, + "args": { + "External id": 291902,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16"], "Input Strides": [[5632, 1], [1, 5632], [2048, 1]], "Input Dims": [[65536, 5632], [5632, 2048], [65536, 2048]], "Ev Idx": 7434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "triton_poi_fused_add_1", "pid": 2070552, "tid": 2070552, + "ts": 5333367589427.787, "dur": 17.255, + "args": { + "External id": 291903,"kernel_hash": "cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5", "grid": "grid(134217728,)", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "134217728"], "kernel_file": "/tmp/torchinductor_cvm/bs/cbs2fq4theggz42vonxbzmdxx46muozp6jzjz5jiwyis4lfh4sl5.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[8388608, 2048, 1], [2048, 1], []], "Input Dims": [[16, 4096, 2048], [65536, 2048], []], "Ev Idx": 7435 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward (model.layers.31)", "pid": 2070552, "tid": 2070552, + "ts": 5333367589585.185, "dur": 33.442, + "args": { + "External id": 291904,"Record function id": 0, "Ev Idx": 7436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "TorchDynamo Cache Lookup", "pid": 2070552, "tid": 2070552, + "ts": 5333367589727.253, "dur": 36.784, + "args": { + "External id": 291905,"Record function id": 0, "Ev Idx": 7437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Torch-Compiled Region: 2/0", "pid": 2070552, "tid": 2070552, + "ts": 5333367589765.752, "dur": 191.527, + "args": { + "External id": 291906,"Record function id": 0, "Ev Idx": 7438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "CompiledFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367589800.580, "dur": 148.953, + "args": { + "External id": 291907,"Sequence number": 1209225, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1], [8388608, 2048, 1]], "Input Dims": [[2048], [16, 4096, 2048]], "Ev Idx": 7439 + } + }, + { + "ph": "s", "id": 7, "pid": 2070552, "tid": 2070552, "ts": 5333367589800.580, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "layer_norm_fwd_kernel_0", "pid": 2070552, "tid": 2070552, + "ts": 5333367589868.951, "dur": 45.808, + "args": { + "External id": 291908,"kernel_hash": "ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh", "Record function id": 0, "stream": 0, "Concrete Inputs": ["", "", "", "", "2048", "1", "9.9999999999999995e-07", "True", "2048", "False", "False", "True", "False"], "kernel_file": "/tmp/torchinductor_cvm/kq/ckqqu7asqn5ayve7e3frzshnlhnsoes6s3p26esudcdww73ltnsh.py", "kernel_backend": "triton", "Input type": ["c10::BFloat16", "c10::BFloat16", "c10::BFloat16", "float", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], [1], [1], [], [], [], [], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 2048], [2048], [65536], [], [], [], [], [], [], [], [], []], "Ev Idx": 7440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367590019.632, "dur": 0.757, + "args": { + "External id": 291909,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "False", "False", ""], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], [], [], [], []], "Ev Idx": 7441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367590029.681, "dur": 12.983, + "args": { + "External id": 291910,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "1", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 7442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367590036.915, "dur": 2.705, + "args": { + "External id": 291911,"Record function id": 0, "Concrete Inputs": ["", "[16, 8191]", "[8192, 1]", "1"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 7443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367590047.722, "dur": 3.171, + "args": { + "External id": 291912,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0", "0", "9223372036854775807", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 7444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367590049.517, "dur": 0.722, + "args": { + "External id": 291913,"Record function id": 0, "Concrete Inputs": ["", "[16, 8192]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 7445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367590052.070, "dur": 3.456, + "args": { + "External id": 291914,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "1", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 7446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367590053.803, "dur": 0.652, + "args": { + "External id": 291915,"Record function id": 0, "Concrete Inputs": ["", "[16, 1]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 7447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::full_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367590062.504, "dur": 44.583, + "args": { + "External id": 291916,"Record function id": 0, "Concrete Inputs": ["", "-100", "", "", "", "False", ""], "Input type": ["long int", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], [], []], "Input Dims": [[16, 1], [], [], [], [], [], []], "Ev Idx": 7448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367590064.940, "dur": 12.187, + "args": { + "External id": 291917,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["long int", "", "", "", "Scalar", ""], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 1], [], [], [], [], []], "Ev Idx": 7449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367590070.712, "dur": 5.731, + "args": { + "External id": 291918,"Record function id": 0, "Concrete Inputs": ["[16, 1]", "[1, 1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367590079.279, "dur": 27.382, + "args": { + "External id": 291919,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1, 1], []], "Input Dims": [[16, 1], []], "Ev Idx": 7451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2070552, "tid": 2070552, + "ts": 5333367590114.974, "dur": 30.316, + "args": { + "External id": 291920,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[[8192, 1], [1, 1]], []], "Input Dims": [[[16, 8191], [16, 1]], []], "Ev Idx": 7452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367590152.216, "dur": 3.727, + "args": { + "External id": 291921,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "1", "0", "4096", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[8192, 1], [], [], [], []], "Input Dims": [[16, 8192], [], [], [], []], "Ev Idx": 7453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367590154.143, "dur": 1.075, + "args": { + "External id": 291922,"Record function id": 0, "Concrete Inputs": ["", "[16, 4096]", "[8192, 1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[8192, 1], [], [], []], "Input Dims": [[16, 8192], [], [], []], "Ev Idx": 7454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::contiguous", "pid": 2070552, "tid": 2070552, + "ts": 5333367590160.677, "dur": 55.882, + "args": { + "External id": 291923,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 7455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2070552, "tid": 2070552, + "ts": 5333367590163.230, "dur": 53.093, + "args": { + "External id": 291924,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["long int", "Scalar"], "Input Strides": [[8192, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 7456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367590165.400, "dur": 25.779, + "args": { + "External id": 291925,"Record function id": 0, "Concrete Inputs": ["", "4", "0", "", "", "0"], "Input type": ["long int", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[8192, 1], [], [], [], [], []], "Input Dims": [[16, 4096], [], [], [], [], []], "Ev Idx": 7457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367590186.202, "dur": 4.362, + "args": { + "External id": 291926,"Record function id": 0, "Concrete Inputs": ["[16, 4096]", "4", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367590192.670, "dur": 23.167, + "args": { + "External id": 291927,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "long int", "Scalar"], "Input Strides": [[4096, 1], [8192, 1], []], "Input Dims": [[16, 4096], [16, 4096], []], "Ev Idx": 7459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367590243.346, "dur": 7.287, + "args": { + "External id": 291928,"Sequence number": 1209226, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7460 + } + }, + { + "ph": "s", "id": 6, "pid": 2070552, "tid": 2070552, "ts": 5333367590243.346, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367590253.256, "dur": 1.125, + "args": { + "External id": 291929,"Sequence number": 1209227, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["long int", "ScalarList"], "Input Strides": [[4096, 1], []], "Input Dims": [[16, 4096], []], "Ev Idx": 7461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearCrossEntropyFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367590283.083, "dur": 18766.447, + "args": { + "External id": 291930,"Sequence number": 1209227, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "0.", "1.", "8"], "Input type": ["c10::BFloat16", "long int", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [1], [2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [65536], [32000, 2048], [], [], [], [], []], "Ev Idx": 7462 + } + }, + { + "ph": "s", "id": 5, "pid": 2070552, "tid": 2070552, "ts": 5333367590283.083, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367590314.726, "dur": 31.044, + "args": { + "External id": 291931,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 7463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367590316.095, "dur": 8.831, + "args": { + "External id": 291932,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 7464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367590318.508, "dur": 6.086, + "args": { + "External id": 291933,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070552, "tid": 2070552, + "ts": 5333367590326.938, "dur": 18.282, + "args": { + "External id": 291934,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 7466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367590327.966, "dur": 16.637, + "args": { + "External id": 291935,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 7467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367590349.578, "dur": 21.064, + "args": { + "External id": 291936,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 7468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367590350.623, "dur": 4.425, + "args": { + "External id": 291937,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 7469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367590352.191, "dur": 2.604, + "args": { + "External id": 291938,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070552, "tid": 2070552, + "ts": 5333367590355.752, "dur": 14.683, + "args": { + "External id": 291939,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367590358.221, "dur": 11.922, + "args": { + "External id": 291940,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 7472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2070552, "tid": 2070552, + "ts": 5333367590377.209, "dur": 16.480, + "args": { + "External id": 291941,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 7473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367590378.632, "dur": 3.311, + "args": { + "External id": 291942,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070552, "tid": 2070552, + "ts": 5333367590382.762, "dur": 10.646, + "args": { + "External id": 291943,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[65536]], "Ev Idx": 7475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367590383.568, "dur": 9.526, + "args": { + "External id": 291944,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2070552, "tid": 2070552, + "ts": 5333367590400.170, "dur": 26.036, + "args": { + "External id": 291945,"Record function id": 0, "Concrete Inputs": ["", "-100"], "Input type": ["long int", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367590430.198, "dur": 54.796, + "args": { + "External id": 291946,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["bool", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367590433.043, "dur": 51.580, + "args": { + "External id": 291947,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["bool", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367590439.242, "dur": 0.925, + "args": { + "External id": 291948,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["long int", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 7480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367590441.400, "dur": 26.068, + "args": { + "External id": 291949,"Record function id": 0, "Concrete Inputs": ["", "4", "False", "False", ""], "Input type": ["bool", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367590445.274, "dur": 21.888, + "args": { + "External id": 291950,"Record function id": 0, "Concrete Inputs": ["", "4", "", "", "", "False", ""], "Input type": ["bool", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[1], [], [], [], [], [], []], "Input Dims": [[65536], [], [], [], [], [], []], "Ev Idx": 7482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367590448.201, "dur": 2.768, + "args": { + "External id": 291951,"Record function id": 0, "Concrete Inputs": ["[65536]", "[1]", "4", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367590452.140, "dur": 14.663, + "args": { + "External id": 291952,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["long int", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[65536], [65536], []], "Ev Idx": 7484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2070552, "tid": 2070552, + "ts": 5333367590489.721, "dur": 12825.303, + "args": { + "External id": 291953,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 7485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2070552, "tid": 2070552, + "ts": 5333367590491.274, "dur": 12822.876, + "args": { + "External id": 291954,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["long int"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 7486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367603324.857, "dur": 7.864, + "args": { + "External id": 291955,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367603329.590, "dur": 1.297, + "args": { + "External id": 291956,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5333367603339.371, "dur": 114.907, + "args": { + "External id": 291957,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367603343.906, "dur": 7.104, + "args": { + "External id": 291958,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367603347.082, "dur": 2.938, + "args": { + "External id": 291959,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367603348.974, "dur": 0.747, + "args": { + "External id": 291960,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367603353.136, "dur": 100.367, + "args": { + "External id": 291961,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367603356.023, "dur": 96.523, + "args": { + "External id": 291962,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367603458.025, "dur": 4.832, + "args": { + "External id": 291963,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367603460.676, "dur": 0.854, + "args": { + "External id": 291964,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367603472.829, "dur": 2.659, + "args": { + "External id": 291965,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367603486.273, "dur": 6.832, + "args": { + "External id": 291966,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367603488.237, "dur": 4.603, + "args": { + "External id": 291967,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367603691.274, "dur": 214.422, + "args": { + "External id": 291968,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367603696.493, "dur": 3.682, + "args": { + "External id": 291969,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367603702.864, "dur": 202.272, + "args": { + "External id": 291970,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367603707.811, "dur": 0.554, + "args": { + "External id": 291971,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367603710.759, "dur": 26.633, + "args": { + "External id": 291972,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367603739.895, "dur": 5.595, + "args": { + "External id": 291973,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367603743.937, "dur": 1.149, + "args": { + "External id": 291974,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367603746.997, "dur": 24.594, + "args": { + "External id": 291975,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367603748.342, "dur": 1.372, + "args": { + "External id": 291976,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367603751.651, "dur": 19.643, + "args": { + "External id": 291977,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367603755.742, "dur": 3.842, + "args": { + "External id": 291978,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367603774.042, "dur": 24.220, + "args": { + "External id": 291979,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367603801.468, "dur": 15.763, + "args": { + "External id": 291980,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367603823.363, "dur": 14.191, + "args": { + "External id": 291981,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367603839.921, "dur": 12.995, + "args": { + "External id": 291982,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367603855.134, "dur": 19.553, + "args": { + "External id": 291983,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367603857.298, "dur": 1.024, + "args": { + "External id": 291984,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367603860.528, "dur": 0.844, + "args": { + "External id": 291985,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367603876.869, "dur": 12.525, + "args": { + "External id": 291986,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367603893.581, "dur": 10.052, + "args": { + "External id": 291987,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367603914.136, "dur": 2.245, + "args": { + "External id": 291988,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367603923.680, "dur": 4.514, + "args": { + "External id": 291989,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367603926.170, "dur": 0.994, + "args": { + "External id": 291990,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367604011.398, "dur": 73.934, + "args": { + "External id": 291991,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367604091.683, "dur": 4.979, + "args": { + "External id": 291992,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367604094.614, "dur": 0.857, + "args": { + "External id": 291993,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367604098.376, "dur": 28.903, + "args": { + "External id": 291994,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367604135.651, "dur": 7.293, + "args": { + "External id": 291995,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367604138.038, "dur": 4.053, + "args": { + "External id": 291996,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367604140.195, "dur": 1.648, + "args": { + "External id": 291997,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367604147.576, "dur": 63.746, + "args": { + "External id": 291998,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367604149.036, "dur": 61.202, + "args": { + "External id": 291999,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367604218.881, "dur": 17.750, + "args": { + "External id": 292000,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367604243.182, "dur": 7.657, + "args": { + "External id": 292001,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367604248.620, "dur": 0.873, + "args": { + "External id": 292002,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5333367604255.602, "dur": 54.071, + "args": { + "External id": 292003,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367604256.515, "dur": 3.847, + "args": { + "External id": 292004,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367604257.409, "dur": 2.261, + "args": { + "External id": 292005,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367604258.660, "dur": 0.869, + "args": { + "External id": 292006,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367604261.000, "dur": 48.253, + "args": { + "External id": 292007,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367604264.473, "dur": 44.297, + "args": { + "External id": 292008,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367604313.974, "dur": 5.564, + "args": { + "External id": 292009,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367604316.158, "dur": 2.323, + "args": { + "External id": 292010,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367604326.696, "dur": 1.953, + "args": { + "External id": 292011,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367604337.533, "dur": 8.205, + "args": { + "External id": 292012,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367604341.394, "dur": 4.020, + "args": { + "External id": 292013,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367604443.207, "dur": 216.366, + "args": { + "External id": 292014,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367604445.291, "dur": 2.285, + "args": { + "External id": 292015,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367604449.364, "dur": 209.729, + "args": { + "External id": 292016,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367604450.904, "dur": 0.428, + "args": { + "External id": 292017,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367604452.964, "dur": 21.922, + "args": { + "External id": 292018,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367604476.687, "dur": 2.840, + "args": { + "External id": 292019,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367604478.511, "dur": 0.762, + "args": { + "External id": 292020,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367604480.704, "dur": 26.313, + "args": { + "External id": 292021,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367604482.096, "dur": 1.685, + "args": { + "External id": 292022,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367604487.411, "dur": 19.326, + "args": { + "External id": 292023,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367604492.109, "dur": 2.919, + "args": { + "External id": 292024,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367604508.826, "dur": 20.643, + "args": { + "External id": 292025,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367604531.344, "dur": 12.672, + "args": { + "External id": 292026,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367604546.863, "dur": 13.473, + "args": { + "External id": 292027,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367604562.275, "dur": 11.158, + "args": { + "External id": 292028,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367604575.260, "dur": 20.718, + "args": { + "External id": 292029,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367604577.042, "dur": 1.096, + "args": { + "External id": 292030,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367604582.303, "dur": 0.830, + "args": { + "External id": 292031,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367604597.614, "dur": 11.823, + "args": { + "External id": 292032,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367604610.583, "dur": 46.551, + "args": { + "External id": 292033,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367604669.375, "dur": 3.816, + "args": { + "External id": 292034,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367604683.834, "dur": 4.771, + "args": { + "External id": 292035,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367604686.898, "dur": 0.744, + "args": { + "External id": 292036,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367604764.188, "dur": 58.933, + "args": { + "External id": 292037,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367604828.587, "dur": 7.457, + "args": { + "External id": 292038,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367604833.689, "dur": 1.267, + "args": { + "External id": 292039,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367604837.603, "dur": 26.086, + "args": { + "External id": 292040,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367604868.879, "dur": 5.868, + "args": { + "External id": 292041,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367604870.828, "dur": 3.256, + "args": { + "External id": 292042,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367604872.544, "dur": 1.321, + "args": { + "External id": 292043,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367604877.932, "dur": 46.767, + "args": { + "External id": 292044,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367604880.612, "dur": 43.236, + "args": { + "External id": 292045,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367604928.879, "dur": 15.836, + "args": { + "External id": 292046,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367604950.663, "dur": 5.029, + "args": { + "External id": 292047,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367604953.988, "dur": 0.674, + "args": { + "External id": 292048,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5333367604960.023, "dur": 51.008, + "args": { + "External id": 292049,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367604961.000, "dur": 7.752, + "args": { + "External id": 292050,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367604962.889, "dur": 5.213, + "args": { + "External id": 292051,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367604966.832, "dur": 1.111, + "args": { + "External id": 292052,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367604969.597, "dur": 41.067, + "args": { + "External id": 292053,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367604970.237, "dur": 39.859, + "args": { + "External id": 292054,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367605015.148, "dur": 3.976, + "args": { + "External id": 292055,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367605017.365, "dur": 0.768, + "args": { + "External id": 292056,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367605026.925, "dur": 1.566, + "args": { + "External id": 292057,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367605036.477, "dur": 6.182, + "args": { + "External id": 292058,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367605038.379, "dur": 3.998, + "args": { + "External id": 292059,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367605130.097, "dur": 306.778, + "args": { + "External id": 292060,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367605132.443, "dur": 2.055, + "args": { + "External id": 292061,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367605138.116, "dur": 298.384, + "args": { + "External id": 292062,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367605141.261, "dur": 0.323, + "args": { + "External id": 292063,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367605142.916, "dur": 39.485, + "args": { + "External id": 292064,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367605185.512, "dur": 3.970, + "args": { + "External id": 292065,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367605188.005, "dur": 1.195, + "args": { + "External id": 292066,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367605190.585, "dur": 26.309, + "args": { + "External id": 292067,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367605193.713, "dur": 1.779, + "args": { + "External id": 292068,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367605196.881, "dur": 19.599, + "args": { + "External id": 292069,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367605199.971, "dur": 2.701, + "args": { + "External id": 292070,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367605218.385, "dur": 36.194, + "args": { + "External id": 292071,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367605256.416, "dur": 42.252, + "args": { + "External id": 292072,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367605303.493, "dur": 38.726, + "args": { + "External id": 292073,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367605343.703, "dur": 31.236, + "args": { + "External id": 292074,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367605376.642, "dur": 26.164, + "args": { + "External id": 292075,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367605378.888, "dur": 1.014, + "args": { + "External id": 292076,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367605382.057, "dur": 0.579, + "args": { + "External id": 292077,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367605404.423, "dur": 15.194, + "args": { + "External id": 292078,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367605422.779, "dur": 12.735, + "args": { + "External id": 292079,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367605444.593, "dur": 2.097, + "args": { + "External id": 292080,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367605456.868, "dur": 3.806, + "args": { + "External id": 292081,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367605459.076, "dur": 0.750, + "args": { + "External id": 292082,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367605531.549, "dur": 54.783, + "args": { + "External id": 292083,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367605591.616, "dur": 6.833, + "args": { + "External id": 292084,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367605594.668, "dur": 2.681, + "args": { + "External id": 292085,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367605599.941, "dur": 61.064, + "args": { + "External id": 292086,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367605670.562, "dur": 6.440, + "args": { + "External id": 292087,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367605672.335, "dur": 3.668, + "args": { + "External id": 292088,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367605674.583, "dur": 1.174, + "args": { + "External id": 292089,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367605680.624, "dur": 45.507, + "args": { + "External id": 292090,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367605682.263, "dur": 43.402, + "args": { + "External id": 292091,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367605730.603, "dur": 15.450, + "args": { + "External id": 292092,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367605752.642, "dur": 6.882, + "args": { + "External id": 292093,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367605757.484, "dur": 0.846, + "args": { + "External id": 292094,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5333367605767.178, "dur": 48.871, + "args": { + "External id": 292095,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367605767.853, "dur": 3.593, + "args": { + "External id": 292096,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367605768.562, "dur": 2.334, + "args": { + "External id": 292097,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367605769.777, "dur": 0.767, + "args": { + "External id": 292098,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367605772.137, "dur": 43.349, + "args": { + "External id": 292099,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367605775.004, "dur": 39.976, + "args": { + "External id": 292100,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367605819.968, "dur": 3.615, + "args": { + "External id": 292101,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367605821.964, "dur": 0.670, + "args": { + "External id": 292102,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367605830.316, "dur": 1.575, + "args": { + "External id": 292103,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367605840.658, "dur": 9.575, + "args": { + "External id": 292104,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367605844.578, "dur": 5.338, + "args": { + "External id": 292105,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367605947.649, "dur": 173.657, + "args": { + "External id": 292106,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367605949.699, "dur": 2.159, + "args": { + "External id": 292107,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367605953.689, "dur": 167.226, + "args": { + "External id": 292108,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367605955.156, "dur": 0.344, + "args": { + "External id": 292109,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367605956.677, "dur": 22.676, + "args": { + "External id": 292110,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367605980.848, "dur": 3.139, + "args": { + "External id": 292111,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367605983.085, "dur": 0.719, + "args": { + "External id": 292112,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367605984.883, "dur": 24.578, + "args": { + "External id": 292113,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367605986.089, "dur": 1.318, + "args": { + "External id": 292114,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367605990.711, "dur": 18.437, + "args": { + "External id": 292115,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367605995.400, "dur": 2.448, + "args": { + "External id": 292116,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367606010.765, "dur": 19.753, + "args": { + "External id": 292117,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367606032.485, "dur": 12.543, + "args": { + "External id": 292118,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367606047.713, "dur": 12.331, + "args": { + "External id": 292119,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367606061.982, "dur": 11.653, + "args": { + "External id": 292120,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367606075.399, "dur": 20.734, + "args": { + "External id": 292121,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367606077.310, "dur": 1.074, + "args": { + "External id": 292122,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367606082.345, "dur": 0.722, + "args": { + "External id": 292123,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367606097.668, "dur": 10.472, + "args": { + "External id": 292124,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367606109.292, "dur": 10.799, + "args": { + "External id": 292125,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367606128.091, "dur": 1.706, + "args": { + "External id": 292126,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367606138.503, "dur": 3.674, + "args": { + "External id": 292127,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367606141.067, "dur": 0.371, + "args": { + "External id": 292128,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367606227.001, "dur": 54.157, + "args": { + "External id": 292129,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367606287.033, "dur": 8.633, + "args": { + "External id": 292130,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367606292.793, "dur": 1.393, + "args": { + "External id": 292131,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367606297.221, "dur": 24.759, + "args": { + "External id": 292132,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367606326.777, "dur": 5.131, + "args": { + "External id": 292133,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367606328.290, "dur": 2.926, + "args": { + "External id": 292134,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367606330.095, "dur": 0.912, + "args": { + "External id": 292135,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367606334.785, "dur": 40.510, + "args": { + "External id": 292136,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367606337.932, "dur": 36.891, + "args": { + "External id": 292137,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367606379.213, "dur": 14.349, + "args": { + "External id": 292138,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367606401.392, "dur": 4.455, + "args": { + "External id": 292139,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367606403.775, "dur": 1.011, + "args": { + "External id": 292140,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5333367606409.980, "dur": 49.318, + "args": { + "External id": 292141,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367606410.893, "dur": 6.714, + "args": { + "External id": 292142,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367606413.451, "dur": 3.550, + "args": { + "External id": 292143,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367606414.761, "dur": 2.039, + "args": { + "External id": 292144,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367606418.311, "dur": 40.629, + "args": { + "External id": 292145,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367606419.250, "dur": 39.086, + "args": { + "External id": 292146,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367606463.870, "dur": 3.664, + "args": { + "External id": 292147,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367606465.867, "dur": 0.804, + "args": { + "External id": 292148,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367606473.339, "dur": 1.441, + "args": { + "External id": 292149,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367606484.476, "dur": 5.585, + "args": { + "External id": 292150,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367606486.369, "dur": 3.431, + "args": { + "External id": 292151,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367606571.762, "dur": 217.338, + "args": { + "External id": 292152,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367606573.783, "dur": 1.953, + "args": { + "External id": 292153,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367606577.265, "dur": 211.428, + "args": { + "External id": 292154,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367606578.736, "dur": 0.314, + "args": { + "External id": 292155,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367606582.063, "dur": 18.619, + "args": { + "External id": 292156,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367606602.453, "dur": 2.906, + "args": { + "External id": 292157,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367606604.519, "dur": 0.624, + "args": { + "External id": 292158,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367606607.970, "dur": 60.612, + "args": { + "External id": 292159,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367606609.716, "dur": 2.944, + "args": { + "External id": 292160,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367606613.874, "dur": 54.337, + "args": { + "External id": 292161,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367606616.494, "dur": 37.677, + "args": { + "External id": 292162,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367606670.582, "dur": 20.233, + "args": { + "External id": 292163,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367606692.594, "dur": 12.969, + "args": { + "External id": 292164,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367606708.003, "dur": 12.523, + "args": { + "External id": 292165,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367606721.996, "dur": 11.398, + "args": { + "External id": 292166,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367606735.255, "dur": 20.956, + "args": { + "External id": 292167,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367606739.219, "dur": 1.418, + "args": { + "External id": 292168,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367606742.425, "dur": 0.915, + "args": { + "External id": 292169,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367606757.887, "dur": 13.016, + "args": { + "External id": 292170,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367606771.970, "dur": 15.761, + "args": { + "External id": 292171,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367606797.088, "dur": 2.105, + "args": { + "External id": 292172,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367606808.808, "dur": 4.015, + "args": { + "External id": 292173,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367606811.335, "dur": 0.570, + "args": { + "External id": 292174,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367606883.086, "dur": 54.087, + "args": { + "External id": 292175,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367606942.294, "dur": 4.802, + "args": { + "External id": 292176,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367606945.012, "dur": 1.043, + "args": { + "External id": 292177,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367606948.449, "dur": 21.818, + "args": { + "External id": 292178,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367606974.939, "dur": 9.691, + "args": { + "External id": 292179,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367606976.800, "dur": 7.100, + "args": { + "External id": 292180,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367606980.569, "dur": 3.067, + "args": { + "External id": 292181,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367606987.786, "dur": 37.681, + "args": { + "External id": 292182,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367606988.853, "dur": 36.032, + "args": { + "External id": 292183,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367607029.504, "dur": 14.049, + "args": { + "External id": 292184,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367607048.544, "dur": 3.842, + "args": { + "External id": 292185,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367607050.640, "dur": 0.785, + "args": { + "External id": 292186,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5333367607056.385, "dur": 46.708, + "args": { + "External id": 292187,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367607058.987, "dur": 3.503, + "args": { + "External id": 292188,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367607059.865, "dur": 2.130, + "args": { + "External id": 292189,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367607061.084, "dur": 0.550, + "args": { + "External id": 292190,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367607063.098, "dur": 39.679, + "args": { + "External id": 292191,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367607063.871, "dur": 38.348, + "args": { + "External id": 292192,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367607106.559, "dur": 3.855, + "args": { + "External id": 292193,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367607108.870, "dur": 0.652, + "args": { + "External id": 292194,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367607117.546, "dur": 1.367, + "args": { + "External id": 292195,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367607125.721, "dur": 5.666, + "args": { + "External id": 292196,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367607127.285, "dur": 3.832, + "args": { + "External id": 292197,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367607228.605, "dur": 174.470, + "args": { + "External id": 292198,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367607230.643, "dur": 2.902, + "args": { + "External id": 292199,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367607237.473, "dur": 165.034, + "args": { + "External id": 292200,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367607240.969, "dur": 0.623, + "args": { + "External id": 292201,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367607242.879, "dur": 20.747, + "args": { + "External id": 292202,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367607265.449, "dur": 4.922, + "args": { + "External id": 292203,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367607267.637, "dur": 2.399, + "args": { + "External id": 292204,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367607271.377, "dur": 20.362, + "args": { + "External id": 292205,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367607272.698, "dur": 1.435, + "args": { + "External id": 292206,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367607275.417, "dur": 16.027, + "args": { + "External id": 292207,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367607278.304, "dur": 2.627, + "args": { + "External id": 292208,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367607293.029, "dur": 18.139, + "args": { + "External id": 292209,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367607312.942, "dur": 12.148, + "args": { + "External id": 292210,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367607329.614, "dur": 12.327, + "args": { + "External id": 292211,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367607343.239, "dur": 10.893, + "args": { + "External id": 292212,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367607355.959, "dur": 19.438, + "args": { + "External id": 292213,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367607357.755, "dur": 0.954, + "args": { + "External id": 292214,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367607360.673, "dur": 2.331, + "args": { + "External id": 292215,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367607376.983, "dur": 11.385, + "args": { + "External id": 292216,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367607391.441, "dur": 10.185, + "args": { + "External id": 292217,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367607410.263, "dur": 1.640, + "args": { + "External id": 292218,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367607421.046, "dur": 3.351, + "args": { + "External id": 292219,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367607423.173, "dur": 0.430, + "args": { + "External id": 292220,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367607486.021, "dur": 51.580, + "args": { + "External id": 292221,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367607542.018, "dur": 4.601, + "args": { + "External id": 292222,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367607544.867, "dur": 0.675, + "args": { + "External id": 292223,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367607548.021, "dur": 22.760, + "args": { + "External id": 292224,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367607577.546, "dur": 5.548, + "args": { + "External id": 292225,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367607579.043, "dur": 3.218, + "args": { + "External id": 292226,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367607580.792, "dur": 1.298, + "args": { + "External id": 292227,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367607585.404, "dur": 73.444, + "args": { + "External id": 292228,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367607586.450, "dur": 71.093, + "args": { + "External id": 292229,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367607664.905, "dur": 16.112, + "args": { + "External id": 292230,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367607687.405, "dur": 7.744, + "args": { + "External id": 292231,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367607692.624, "dur": 1.292, + "args": { + "External id": 292232,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5333367607699.279, "dur": 50.783, + "args": { + "External id": 292233,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367607699.957, "dur": 3.844, + "args": { + "External id": 292234,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367607700.766, "dur": 2.438, + "args": { + "External id": 292235,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367607702.234, "dur": 0.842, + "args": { + "External id": 292236,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367607704.403, "dur": 45.257, + "args": { + "External id": 292237,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367607707.065, "dur": 42.098, + "args": { + "External id": 292238,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367607754.063, "dur": 3.421, + "args": { + "External id": 292239,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367607756.077, "dur": 0.485, + "args": { + "External id": 292240,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367607763.786, "dur": 1.732, + "args": { + "External id": 292241,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367607773.190, "dur": 6.303, + "args": { + "External id": 292242,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367607775.234, "dur": 3.931, + "args": { + "External id": 292243,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367607867.135, "dur": 168.762, + "args": { + "External id": 292244,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367607871.019, "dur": 2.216, + "args": { + "External id": 292245,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367607875.051, "dur": 160.410, + "args": { + "External id": 292246,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367607876.677, "dur": 0.436, + "args": { + "External id": 292247,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367607878.162, "dur": 18.796, + "args": { + "External id": 292248,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367607898.970, "dur": 4.340, + "args": { + "External id": 292249,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367607902.246, "dur": 0.823, + "args": { + "External id": 292250,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367607904.083, "dur": 19.832, + "args": { + "External id": 292251,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367607905.295, "dur": 1.210, + "args": { + "External id": 292252,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367607907.809, "dur": 15.843, + "args": { + "External id": 292253,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367607910.598, "dur": 2.560, + "args": { + "External id": 292254,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367607927.495, "dur": 17.649, + "args": { + "External id": 292255,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367607947.147, "dur": 14.343, + "args": { + "External id": 292256,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367607963.797, "dur": 12.468, + "args": { + "External id": 292257,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367607977.872, "dur": 11.105, + "args": { + "External id": 292258,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367607990.582, "dur": 17.829, + "args": { + "External id": 292259,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367607992.520, "dur": 1.023, + "args": { + "External id": 292260,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367607995.362, "dur": 0.627, + "args": { + "External id": 292261,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367608012.056, "dur": 10.818, + "args": { + "External id": 292262,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367608023.928, "dur": 10.492, + "args": { + "External id": 292263,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367608042.384, "dur": 1.512, + "args": { + "External id": 292264,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367608052.676, "dur": 3.563, + "args": { + "External id": 292265,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367608055.015, "dur": 0.411, + "args": { + "External id": 292266,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367608116.229, "dur": 47.205, + "args": { + "External id": 292267,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367608184.599, "dur": 6.373, + "args": { + "External id": 292268,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367608187.895, "dur": 1.455, + "args": { + "External id": 292269,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367608194.254, "dur": 25.038, + "args": { + "External id": 292270,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367608224.614, "dur": 5.521, + "args": { + "External id": 292271,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367608225.974, "dur": 3.420, + "args": { + "External id": 292272,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367608227.801, "dur": 1.385, + "args": { + "External id": 292273,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367608233.276, "dur": 48.188, + "args": { + "External id": 292274,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367608234.394, "dur": 46.518, + "args": { + "External id": 292275,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367608285.444, "dur": 13.609, + "args": { + "External id": 292276,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367608306.729, "dur": 3.941, + "args": { + "External id": 292277,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367608308.952, "dur": 0.764, + "args": { + "External id": 292278,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5333367608314.446, "dur": 46.527, + "args": { + "External id": 292279,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367608315.575, "dur": 3.333, + "args": { + "External id": 292280,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367608316.164, "dur": 2.240, + "args": { + "External id": 292281,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367608317.215, "dur": 1.073, + "args": { + "External id": 292282,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367608321.565, "dur": 39.086, + "args": { + "External id": 292283,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367608322.151, "dur": 38.028, + "args": { + "External id": 292284,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367608364.646, "dur": 3.840, + "args": { + "External id": 292285,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["long int", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367608366.607, "dur": 1.015, + "args": { + "External id": 292286,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["long int", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367608374.404, "dur": 1.654, + "args": { + "External id": 292287,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367608383.699, "dur": 8.800, + "args": { + "External id": 292288,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367608387.273, "dur": 4.923, + "args": { + "External id": 292289,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367608469.254, "dur": 218.076, + "args": { + "External id": 292290,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367608470.929, "dur": 1.941, + "args": { + "External id": 292291,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367608474.457, "dur": 212.383, + "args": { + "External id": 292292,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367608475.900, "dur": 0.343, + "args": { + "External id": 292293,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367608477.468, "dur": 21.266, + "args": { + "External id": 292294,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367608500.271, "dur": 3.277, + "args": { + "External id": 292295,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367608502.423, "dur": 0.711, + "args": { + "External id": 292296,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367608504.463, "dur": 25.083, + "args": { + "External id": 292297,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367608508.163, "dur": 1.794, + "args": { + "External id": 292298,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367608510.896, "dur": 18.370, + "args": { + "External id": 292299,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367608515.285, "dur": 2.212, + "args": { + "External id": 292300,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367608530.752, "dur": 18.408, + "args": { + "External id": 292301,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367608550.801, "dur": 11.267, + "args": { + "External id": 292302,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367608564.331, "dur": 12.175, + "args": { + "External id": 292303,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367608577.954, "dur": 11.193, + "args": { + "External id": 292304,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367608590.440, "dur": 19.144, + "args": { + "External id": 292305,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367608592.184, "dur": 0.973, + "args": { + "External id": 292306,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367608597.102, "dur": 0.839, + "args": { + "External id": 292307,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367608611.089, "dur": 56.429, + "args": { + "External id": 292308,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367608670.219, "dur": 14.640, + "args": { + "External id": 292309,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367608696.586, "dur": 2.464, + "args": { + "External id": 292310,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367608708.945, "dur": 4.184, + "args": { + "External id": 292311,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367608711.476, "dur": 0.707, + "args": { + "External id": 292312,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367608782.550, "dur": 53.632, + "args": { + "External id": 292313,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367608843.353, "dur": 5.044, + "args": { + "External id": 292314,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367608845.847, "dur": 1.505, + "args": { + "External id": 292315,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367608849.850, "dur": 22.631, + "args": { + "External id": 292316,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367608876.431, "dur": 5.111, + "args": { + "External id": 292317,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367608878.028, "dur": 2.845, + "args": { + "External id": 292318,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367608879.845, "dur": 0.879, + "args": { + "External id": 292319,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367608886.349, "dur": 37.845, + "args": { + "External id": 292320,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367608887.337, "dur": 36.265, + "args": { + "External id": 292321,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367608928.051, "dur": 13.196, + "args": { + "External id": 292322,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367608946.330, "dur": 24.775, + "args": { + "External id": 292323,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367608948.975, "dur": 21.671, + "args": { + "External id": 292324,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367608955.571, "dur": 0.935, + "args": { + "External id": 292325,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 7857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333367608977.610, "dur": 28.940, + "args": { + "External id": 292326,"Record function id": 0, "Concrete Inputs": ["", "", "15", "False", "False", ""], "Input type": ["float", "", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 7858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_to_copy", "pid": 2070552, "tid": 2070552, + "ts": 5333367608979.405, "dur": 26.841, + "args": { + "External id": 292327,"Record function id": 0, "Concrete Inputs": ["", "15", "", "", "", "False", ""], "Input type": ["float", "Scalar", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], [], []], "Ev Idx": 7859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367608984.608, "dur": 3.503, + "args": { + "External id": 292328,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367608991.310, "dur": 14.461, + "args": { + "External id": 292329,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "float", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2070552, + "ts": 5333367609019.378, "dur": 4.710, + "args": { + "External id": 292330,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 7862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2070552, + "ts": 5333367609021.255, "dur": 2.579, + "args": { + "External id": 292331,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 7863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::detach", "pid": 2070552, "tid": 2070552, + "ts": 5333367609025.208, "dur": 0.872, + "args": { + "External id": 292332,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "detach", "pid": 2070552, "tid": 2070552, + "ts": 5333367609025.557, "dur": 0.450, + "args": { + "External id": 292333,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367609077.633, "dur": 2.845, + "args": { + "External id": 292334,"Record function id": 0, "Concrete Inputs": ["[16, 4096, 32000]", "5", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367609236.434, "dur": 7.720, + "args": { + "External id": 292335,"Sequence number": 1209228, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 2048]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[8388608, 2048, 1], []], "Input Dims": [[16, 4096, 2048], []], "Ev Idx": 7867 + } + }, + { + "ph": "s", "id": 4, "pid": 2070552, "tid": 2070552, "ts": 5333367609236.434, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367609249.746, "dur": 0.926, + "args": { + "External id": 292336,"Sequence number": 1209229, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[131072000, 32000, 1], []], "Input Dims": [[16, 4096, 32000], []], "Ev Idx": 7868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "FusedLinearListNetFunction", "pid": 2070552, "tid": 2070552, + "ts": 5333367609281.873, "dur": 8190.350, + "args": { + "External id": 292337,"Sequence number": 1209229, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "", "", "-100", "1.", "8"], "Input type": ["c10::BFloat16", "c10::Half", "c10::BFloat16", "", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [32000, 1], [2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [65536, 32000], [32000, 2048], [], [], [], []], "Ev Idx": 7869 + } + }, + { + "ph": "s", "id": 3, "pid": 2070552, "tid": 2070552, "ts": 5333367609281.873, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367609296.358, "dur": 35.125, + "args": { + "External id": 292338,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", ""], "Input type": ["c10::BFloat16", "", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 7870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367609298.301, "dur": 11.119, + "args": { + "External id": 292339,"Record function id": 0, "Concrete Inputs": ["", "15", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], [], []], "Ev Idx": 7871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367609301.738, "dur": 7.158, + "args": { + "External id": 292340,"Record function id": 0, "Concrete Inputs": ["[65536, 2048]", "[2048, 1]", "15", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070552, "tid": 2070552, + "ts": 5333367609310.983, "dur": 20.284, + "args": { + "External id": 292341,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[65536, 2048]], "Ev Idx": 7873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367609312.962, "dur": 17.922, + "args": { + "External id": 292342,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[65536, 2048], []], "Ev Idx": 7874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367609334.823, "dur": 25.439, + "args": { + "External id": 292343,"Record function id": 0, "Concrete Inputs": ["", "6", "", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 7875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367609335.834, "dur": 6.546, + "args": { + "External id": 292344,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "False", ""], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[2048, 1], [], [], [], [], []], "Input Dims": [[32000, 2048], [], [], [], [], []], "Ev Idx": 7876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367609339.711, "dur": 2.430, + "args": { + "External id": 292345,"Record function id": 0, "Concrete Inputs": ["[32000, 2048]", "[2048, 1]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070552, "tid": 2070552, + "ts": 5333367609347.366, "dur": 12.704, + "args": { + "External id": 292346,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367609348.034, "dur": 11.466, + "args": { + "External id": 292347,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[2048, 1], []], "Input Dims": [[32000, 2048], []], "Ev Idx": 7879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2070552, "tid": 2070552, + "ts": 5333367609366.966, "dur": 18.062, + "args": { + "External id": 292348,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False"], "Input type": ["ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 7880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367609368.634, "dur": 4.019, + "args": { + "External id": 292349,"Record function id": 0, "Concrete Inputs": ["[65536]", "6", "", "", "False", ""], "Input type": ["ScalarList", "Scalar", "", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070552, "tid": 2070552, + "ts": 5333367609373.239, "dur": 11.496, + "args": { + "External id": 292350,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[65536]], "Ev Idx": 7882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367609375.658, "dur": 8.692, + "args": { + "External id": 292351,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 7883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367609390.986, "dur": 35.651, + "args": { + "External id": 292352,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367609423.282, "dur": 1.715, + "args": { + "External id": 292353,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5333367609431.471, "dur": 87.074, + "args": { + "External id": 292354,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367609432.630, "dur": 5.271, + "args": { + "External id": 292355,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367609434.241, "dur": 2.813, + "args": { + "External id": 292356,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367609435.888, "dur": 0.918, + "args": { + "External id": 292357,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367609439.174, "dur": 78.562, + "args": { + "External id": 292358,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367609442.954, "dur": 73.876, + "args": { + "External id": 292359,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367609522.077, "dur": 3.497, + "args": { + "External id": 292360,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 7892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367609524.181, "dur": 0.444, + "args": { + "External id": 292361,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "0"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 7893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367609529.941, "dur": 1.551, + "args": { + "External id": 292362,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367609537.959, "dur": 7.418, + "args": { + "External id": 292363,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367609541.796, "dur": 3.313, + "args": { + "External id": 292364,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367609689.344, "dur": 195.630, + "args": { + "External id": 292365,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367609692.158, "dur": 5.008, + "args": { + "External id": 292366,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367609699.073, "dur": 185.391, + "args": { + "External id": 292367,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367609700.758, "dur": 0.437, + "args": { + "External id": 292368,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367609702.828, "dur": 25.462, + "args": { + "External id": 292369,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367609730.083, "dur": 5.451, + "args": { + "External id": 292370,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367609734.086, "dur": 1.170, + "args": { + "External id": 292371,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367609738.532, "dur": 22.425, + "args": { + "External id": 292372,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367609739.764, "dur": 1.260, + "args": { + "External id": 292373,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367609742.396, "dur": 18.311, + "args": { + "External id": 292374,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367609745.869, "dur": 3.498, + "args": { + "External id": 292375,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367609762.611, "dur": 22.981, + "args": { + "External id": 292376,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367609787.281, "dur": 14.083, + "args": { + "External id": 292377,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367609806.456, "dur": 13.695, + "args": { + "External id": 292378,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367609821.576, "dur": 11.735, + "args": { + "External id": 292379,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367609835.429, "dur": 22.906, + "args": { + "External id": 292380,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367609841.486, "dur": 1.182, + "args": { + "External id": 292381,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367609845.000, "dur": 0.827, + "args": { + "External id": 292382,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367609859.804, "dur": 11.570, + "args": { + "External id": 292383,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367609872.870, "dur": 10.419, + "args": { + "External id": 292384,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367609892.425, "dur": 1.709, + "args": { + "External id": 292385,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367609904.513, "dur": 1.324, + "args": { + "External id": 292386,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367609911.815, "dur": 3.953, + "args": { + "External id": 292387,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367609913.486, "dur": 2.004, + "args": { + "External id": 292388,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367609999.138, "dur": 149.339, + "args": { + "External id": 292389,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367610000.549, "dur": 2.464, + "args": { + "External id": 292390,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367610004.044, "dur": 144.049, + "args": { + "External id": 292391,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367610005.045, "dur": 0.185, + "args": { + "External id": 292392,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367610008.508, "dur": 17.924, + "args": { + "External id": 292393,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367610027.901, "dur": 3.038, + "args": { + "External id": 292394,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367610029.963, "dur": 0.764, + "args": { + "External id": 292395,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367610031.413, "dur": 17.946, + "args": { + "External id": 292396,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367610032.201, "dur": 1.381, + "args": { + "External id": 292397,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367610034.482, "dur": 14.639, + "args": { + "External id": 292398,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367610038.056, "dur": 1.347, + "args": { + "External id": 292399,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367610050.169, "dur": 15.314, + "args": { + "External id": 292400,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367610066.786, "dur": 10.568, + "args": { + "External id": 292401,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367610079.525, "dur": 11.182, + "args": { + "External id": 292402,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367610093.516, "dur": 9.793, + "args": { + "External id": 292403,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367610104.471, "dur": 15.594, + "args": { + "External id": 292404,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367610105.811, "dur": 0.980, + "args": { + "External id": 292405,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367610108.284, "dur": 0.691, + "args": { + "External id": 292406,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367610121.030, "dur": 10.735, + "args": { + "External id": 292407,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367610137.624, "dur": 9.632, + "args": { + "External id": 292408,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367610154.850, "dur": 1.414, + "args": { + "External id": 292409,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5333367610182.863, "dur": 37.873, + "args": { + "External id": 292410,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367610185.618, "dur": 7.701, + "args": { + "External id": 292411,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 7943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367610188.113, "dur": 4.582, + "args": { + "External id": 292412,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5333367610195.673, "dur": 24.347, + "args": { + "External id": 292413,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 7945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367610228.014, "dur": 5.425, + "args": { + "External id": 292414,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 7946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367610231.247, "dur": 0.991, + "args": { + "External id": 292415,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "0"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 7947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367610311.062, "dur": 87.442, + "args": { + "External id": 292416,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 7948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367610403.848, "dur": 4.980, + "args": { + "External id": 292417,"Record function id": 0, "Concrete Inputs": ["", "0", "0", "8192", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367610406.657, "dur": 0.899, + "args": { + "External id": 292418,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "0"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367610410.701, "dur": 26.599, + "args": { + "External id": 292419,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 7951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367610442.643, "dur": 8.819, + "args": { + "External id": 292420,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 7952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367610444.593, "dur": 6.133, + "args": { + "External id": 292421,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 7953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367610449.305, "dur": 1.206, + "args": { + "External id": 292422,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 7954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367610454.770, "dur": 43.449, + "args": { + "External id": 292423,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367610456.061, "dur": 41.592, + "args": { + "External id": 292424,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 7956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367610502.752, "dur": 15.597, + "args": { + "External id": 292425,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 7957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367610524.781, "dur": 4.475, + "args": { + "External id": 292426,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 7958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367610527.096, "dur": 0.983, + "args": { + "External id": 292427,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 7959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5333367610535.489, "dur": 52.308, + "args": { + "External id": 292428,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 7960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367610536.549, "dur": 4.204, + "args": { + "External id": 292429,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 7961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367610537.352, "dur": 2.822, + "args": { + "External id": 292430,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 7962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367610539.432, "dur": 0.604, + "args": { + "External id": 292431,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 7963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367610541.315, "dur": 46.131, + "args": { + "External id": 292432,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367610542.129, "dur": 44.845, + "args": { + "External id": 292433,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 7965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367610593.638, "dur": 3.914, + "args": { + "External id": 292434,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 7966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367610595.932, "dur": 0.749, + "args": { + "External id": 292435,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "262144000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 7967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367610603.803, "dur": 1.404, + "args": { + "External id": 292436,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367610611.892, "dur": 7.526, + "args": { + "External id": 292437,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367610615.696, "dur": 3.408, + "args": { + "External id": 292438,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367610750.261, "dur": 179.322, + "args": { + "External id": 292439,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367610752.846, "dur": 5.005, + "args": { + "External id": 292440,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367610759.341, "dur": 169.749, + "args": { + "External id": 292441,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367610761.074, "dur": 0.350, + "args": { + "External id": 292442,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367610762.770, "dur": 21.949, + "args": { + "External id": 292443,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367610786.883, "dur": 5.163, + "args": { + "External id": 292444,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 7976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367610790.874, "dur": 0.845, + "args": { + "External id": 292445,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 7977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367610793.103, "dur": 23.321, + "args": { + "External id": 292446,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367610796.803, "dur": 1.113, + "args": { + "External id": 292447,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367610799.094, "dur": 16.970, + "args": { + "External id": 292448,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 7980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367610802.021, "dur": 3.481, + "args": { + "External id": 292449,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367610817.839, "dur": 20.254, + "args": { + "External id": 292450,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367610839.853, "dur": 12.022, + "args": { + "External id": 292451,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367610854.681, "dur": 12.766, + "args": { + "External id": 292452,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 7984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367610868.967, "dur": 11.506, + "args": { + "External id": 292453,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367610882.159, "dur": 21.794, + "args": { + "External id": 292454,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 7986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367610886.153, "dur": 0.820, + "args": { + "External id": 292455,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 7987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367610890.920, "dur": 0.717, + "args": { + "External id": 292456,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 7988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367610905.212, "dur": 10.839, + "args": { + "External id": 292457,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 7989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367610917.250, "dur": 10.654, + "args": { + "External id": 292458,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 7990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367610936.809, "dur": 2.998, + "args": { + "External id": 292459,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 7991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367610949.841, "dur": 1.484, + "args": { + "External id": 292460,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 7992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367610958.479, "dur": 3.823, + "args": { + "External id": 292461,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 7993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367610959.586, "dur": 2.476, + "args": { + "External id": 292462,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367611038.721, "dur": 166.938, + "args": { + "External id": 292463,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367611040.229, "dur": 3.070, + "args": { + "External id": 292464,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 7996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367611044.530, "dur": 160.656, + "args": { + "External id": 292465,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 7997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367611045.424, "dur": 0.119, + "args": { + "External id": 292466,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 7998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367611048.528, "dur": 19.141, + "args": { + "External id": 292467,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 7999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367611069.176, "dur": 2.523, + "args": { + "External id": 292468,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367611070.803, "dur": 0.676, + "args": { + "External id": 292469,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367611072.315, "dur": 18.331, + "args": { + "External id": 292470,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367611073.741, "dur": 1.472, + "args": { + "External id": 292471,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367611076.140, "dur": 14.270, + "args": { + "External id": 292472,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367611079.720, "dur": 1.226, + "args": { + "External id": 292473,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367611091.699, "dur": 15.046, + "args": { + "External id": 292474,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367611108.013, "dur": 10.258, + "args": { + "External id": 292475,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367611120.492, "dur": 10.944, + "args": { + "External id": 292476,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367611132.161, "dur": 10.684, + "args": { + "External id": 292477,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367611146.360, "dur": 16.391, + "args": { + "External id": 292478,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367611147.749, "dur": 1.351, + "args": { + "External id": 292479,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367611151.067, "dur": 0.615, + "args": { + "External id": 292480,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367611163.574, "dur": 27.003, + "args": { + "External id": 292481,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367611192.916, "dur": 10.886, + "args": { + "External id": 292482,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367611212.530, "dur": 1.865, + "args": { + "External id": 292483,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5333367611222.916, "dur": 25.602, + "args": { + "External id": 292484,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367611225.690, "dur": 6.960, + "args": { + "External id": 292485,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367611228.088, "dur": 4.139, + "args": { + "External id": 292486,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5333367611234.720, "dur": 13.150, + "args": { + "External id": 292487,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367611256.954, "dur": 5.353, + "args": { + "External id": 292488,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367611260.266, "dur": 1.004, + "args": { + "External id": 292489,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "8192"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367611334.236, "dur": 67.984, + "args": { + "External id": 292490,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367611407.687, "dur": 6.878, + "args": { + "External id": 292491,"Record function id": 0, "Concrete Inputs": ["", "0", "8192", "16384", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367611410.951, "dur": 2.276, + "args": { + "External id": 292492,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "16777216"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367611416.311, "dur": 26.415, + "args": { + "External id": 292493,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367611448.054, "dur": 8.297, + "args": { + "External id": 292494,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367611452.134, "dur": 3.459, + "args": { + "External id": 292495,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367611454.177, "dur": 1.096, + "args": { + "External id": 292496,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367611459.784, "dur": 41.431, + "args": { + "External id": 292497,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367611461.541, "dur": 39.040, + "args": { + "External id": 292498,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367611505.472, "dur": 14.526, + "args": { + "External id": 292499,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367611525.664, "dur": 6.519, + "args": { + "External id": 292500,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367611530.175, "dur": 0.793, + "args": { + "External id": 292501,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5333367611536.380, "dur": 45.687, + "args": { + "External id": 292502,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367611537.322, "dur": 3.509, + "args": { + "External id": 292503,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367611538.050, "dur": 2.226, + "args": { + "External id": 292504,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367611539.343, "dur": 0.795, + "args": { + "External id": 292505,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367611541.528, "dur": 40.204, + "args": { + "External id": 292506,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367611542.150, "dur": 38.946, + "args": { + "External id": 292507,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367611585.949, "dur": 10.068, + "args": { + "External id": 292508,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367611594.414, "dur": 0.676, + "args": { + "External id": 292509,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "524288000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367611601.352, "dur": 1.693, + "args": { + "External id": 292510,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367611609.812, "dur": 5.673, + "args": { + "External id": 292511,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367611611.538, "dur": 3.668, + "args": { + "External id": 292512,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367611746.197, "dur": 182.556, + "args": { + "External id": 292513,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367611750.249, "dur": 3.013, + "args": { + "External id": 292514,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367611755.054, "dur": 173.292, + "args": { + "External id": 292515,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367611756.522, "dur": 0.407, + "args": { + "External id": 292516,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367611757.945, "dur": 22.458, + "args": { + "External id": 292517,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367611782.167, "dur": 5.428, + "args": { + "External id": 292518,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367611786.021, "dur": 1.319, + "args": { + "External id": 292519,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367611788.562, "dur": 24.688, + "args": { + "External id": 292520,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367611789.950, "dur": 1.501, + "args": { + "External id": 292521,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367611792.426, "dur": 20.581, + "args": { + "External id": 292522,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367611797.294, "dur": 2.638, + "args": { + "External id": 292523,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367611814.643, "dur": 21.167, + "args": { + "External id": 292524,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367611837.637, "dur": 14.751, + "args": { + "External id": 292525,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367611855.515, "dur": 13.540, + "args": { + "External id": 292526,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367611870.566, "dur": 11.088, + "args": { + "External id": 292527,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367611883.395, "dur": 17.836, + "args": { + "External id": 292528,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367611885.327, "dur": 1.089, + "args": { + "External id": 292529,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367611888.298, "dur": 0.523, + "args": { + "External id": 292530,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367611904.999, "dur": 10.437, + "args": { + "External id": 292531,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367611916.422, "dur": 10.692, + "args": { + "External id": 292532,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367611935.533, "dur": 1.900, + "args": { + "External id": 292533,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367611946.238, "dur": 1.348, + "args": { + "External id": 292534,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367611953.417, "dur": 6.194, + "args": { + "External id": 292535,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367611957.035, "dur": 2.271, + "args": { + "External id": 292536,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367612028.336, "dur": 158.820, + "args": { + "External id": 292537,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367612029.824, "dur": 1.715, + "args": { + "External id": 292538,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367612032.830, "dur": 153.935, + "args": { + "External id": 292539,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367612034.120, "dur": 0.150, + "args": { + "External id": 292540,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367612034.868, "dur": 16.033, + "args": { + "External id": 292541,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367612052.359, "dur": 2.567, + "args": { + "External id": 292542,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367612053.960, "dur": 0.782, + "args": { + "External id": 292543,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367612055.611, "dur": 20.252, + "args": { + "External id": 292544,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367612056.835, "dur": 1.353, + "args": { + "External id": 292545,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367612061.082, "dur": 14.530, + "args": { + "External id": 292546,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367612064.275, "dur": 1.537, + "args": { + "External id": 292547,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367612076.941, "dur": 14.060, + "args": { + "External id": 292548,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367612092.213, "dur": 10.695, + "args": { + "External id": 292549,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367612104.959, "dur": 11.363, + "args": { + "External id": 292550,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367612117.150, "dur": 10.533, + "args": { + "External id": 292551,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367612129.134, "dur": 17.455, + "args": { + "External id": 292552,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367612130.569, "dur": 0.979, + "args": { + "External id": 292553,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367612134.848, "dur": 1.058, + "args": { + "External id": 292554,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367612147.545, "dur": 10.821, + "args": { + "External id": 292555,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367612159.120, "dur": 25.963, + "args": { + "External id": 292556,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367612194.092, "dur": 1.790, + "args": { + "External id": 292557,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5333367612203.862, "dur": 28.081, + "args": { + "External id": 292558,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367612206.752, "dur": 6.896, + "args": { + "External id": 292559,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367612209.092, "dur": 4.133, + "args": { + "External id": 292560,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5333367612215.378, "dur": 15.822, + "args": { + "External id": 292561,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367612239.083, "dur": 4.920, + "args": { + "External id": 292562,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367612241.853, "dur": 1.122, + "args": { + "External id": 292563,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "16384"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367612311.054, "dur": 61.424, + "args": { + "External id": 292564,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367612377.639, "dur": 6.845, + "args": { + "External id": 292565,"Record function id": 0, "Concrete Inputs": ["", "0", "16384", "24576", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367612382.402, "dur": 1.027, + "args": { + "External id": 292566,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "33554432"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367612386.134, "dur": 23.374, + "args": { + "External id": 292567,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367612414.312, "dur": 6.347, + "args": { + "External id": 292568,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367612416.278, "dur": 3.674, + "args": { + "External id": 292569,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367612418.505, "dur": 1.254, + "args": { + "External id": 292570,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367612423.856, "dur": 43.552, + "args": { + "External id": 292571,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367612427.446, "dur": 39.340, + "args": { + "External id": 292572,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367612471.595, "dur": 13.657, + "args": { + "External id": 292573,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367612491.111, "dur": 5.044, + "args": { + "External id": 292574,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367612494.187, "dur": 0.733, + "args": { + "External id": 292575,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5333367612500.535, "dur": 49.948, + "args": { + "External id": 292576,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367612501.340, "dur": 6.718, + "args": { + "External id": 292577,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367612502.359, "dur": 5.132, + "args": { + "External id": 292578,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367612506.461, "dur": 0.863, + "args": { + "External id": 292579,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367612509.092, "dur": 41.116, + "args": { + "External id": 292580,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367612509.904, "dur": 39.779, + "args": { + "External id": 292581,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367612554.008, "dur": 3.985, + "args": { + "External id": 292582,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367612556.417, "dur": 0.726, + "args": { + "External id": 292583,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "786432000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367612565.650, "dur": 1.796, + "args": { + "External id": 292584,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367612573.983, "dur": 5.293, + "args": { + "External id": 292585,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367612575.515, "dur": 3.492, + "args": { + "External id": 292586,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367612705.360, "dur": 178.812, + "args": { + "External id": 292587,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367612707.932, "dur": 3.765, + "args": { + "External id": 292588,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367612715.505, "dur": 168.256, + "args": { + "External id": 292589,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367612719.313, "dur": 0.447, + "args": { + "External id": 292590,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367612720.937, "dur": 22.107, + "args": { + "External id": 292591,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367612744.806, "dur": 5.196, + "args": { + "External id": 292592,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367612746.802, "dur": 2.628, + "args": { + "External id": 292593,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367612751.035, "dur": 20.505, + "args": { + "External id": 292594,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367612752.290, "dur": 1.189, + "args": { + "External id": 292595,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367612754.578, "dur": 16.670, + "args": { + "External id": 292596,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367612757.505, "dur": 2.886, + "args": { + "External id": 292597,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367612773.110, "dur": 19.442, + "args": { + "External id": 292598,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367612793.758, "dur": 12.712, + "args": { + "External id": 292599,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367612811.433, "dur": 12.099, + "args": { + "External id": 292600,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367612825.160, "dur": 11.036, + "args": { + "External id": 292601,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367612838.257, "dur": 18.966, + "args": { + "External id": 292602,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367612840.032, "dur": 0.925, + "args": { + "External id": 292603,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367612843.164, "dur": 1.211, + "args": { + "External id": 292604,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367612858.751, "dur": 10.940, + "args": { + "External id": 292605,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367612872.653, "dur": 9.848, + "args": { + "External id": 292606,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367612891.227, "dur": 1.785, + "args": { + "External id": 292607,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367612902.470, "dur": 1.427, + "args": { + "External id": 292608,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367612909.382, "dur": 4.176, + "args": { + "External id": 292609,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367612910.856, "dur": 2.461, + "args": { + "External id": 292610,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367612986.898, "dur": 155.092, + "args": { + "External id": 292611,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367612990.768, "dur": 1.510, + "args": { + "External id": 292612,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367612993.589, "dur": 148.069, + "args": { + "External id": 292613,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367612994.754, "dur": 0.181, + "args": { + "External id": 292614,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367612996.087, "dur": 15.157, + "args": { + "External id": 292615,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367613012.892, "dur": 7.638, + "args": { + "External id": 292616,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367613019.438, "dur": 0.647, + "args": { + "External id": 292617,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367613021.392, "dur": 18.706, + "args": { + "External id": 292618,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367613024.295, "dur": 1.267, + "args": { + "External id": 292619,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367613026.721, "dur": 13.136, + "args": { + "External id": 292620,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367613028.922, "dur": 1.781, + "args": { + "External id": 292621,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367613043.463, "dur": 15.627, + "args": { + "External id": 292622,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367613060.201, "dur": 10.620, + "args": { + "External id": 292623,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367613072.920, "dur": 11.250, + "args": { + "External id": 292624,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367613085.046, "dur": 10.964, + "args": { + "External id": 292625,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367613097.274, "dur": 19.756, + "args": { + "External id": 292626,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367613100.428, "dur": 0.897, + "args": { + "External id": 292627,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367613105.311, "dur": 0.612, + "args": { + "External id": 292628,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367613118.121, "dur": 10.313, + "args": { + "External id": 292629,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367613129.365, "dur": 11.195, + "args": { + "External id": 292630,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367613146.695, "dur": 1.302, + "args": { + "External id": 292631,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5333367613156.364, "dur": 42.456, + "args": { + "External id": 292632,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367613158.539, "dur": 6.456, + "args": { + "External id": 292633,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367613160.924, "dur": 3.579, + "args": { + "External id": 292634,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5333367613182.409, "dur": 15.525, + "args": { + "External id": 292635,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367613206.961, "dur": 5.861, + "args": { + "External id": 292636,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367613210.618, "dur": 0.912, + "args": { + "External id": 292637,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "24576"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367613280.617, "dur": 66.035, + "args": { + "External id": 292638,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367613351.843, "dur": 7.240, + "args": { + "External id": 292639,"Record function id": 0, "Concrete Inputs": ["", "0", "24576", "32768", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367613356.723, "dur": 1.100, + "args": { + "External id": 292640,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "50331648"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367613360.852, "dur": 25.412, + "args": { + "External id": 292641,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367613390.892, "dur": 5.478, + "args": { + "External id": 292642,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367613392.546, "dur": 3.168, + "args": { + "External id": 292643,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367613394.654, "dur": 0.872, + "args": { + "External id": 292644,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367613399.829, "dur": 41.770, + "args": { + "External id": 292645,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367613403.198, "dur": 37.774, + "args": { + "External id": 292646,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367613446.301, "dur": 13.879, + "args": { + "External id": 292647,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367613465.737, "dur": 4.272, + "args": { + "External id": 292648,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367613467.978, "dur": 0.991, + "args": { + "External id": 292649,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5333367613474.231, "dur": 50.752, + "args": { + "External id": 292650,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367613475.191, "dur": 7.444, + "args": { + "External id": 292651,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367613475.945, "dur": 6.098, + "args": { + "External id": 292652,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367613479.089, "dur": 2.785, + "args": { + "External id": 292653,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367613483.593, "dur": 41.033, + "args": { + "External id": 292654,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367613484.220, "dur": 39.811, + "args": { + "External id": 292655,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367613528.944, "dur": 3.145, + "args": { + "External id": 292656,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367613530.648, "dur": 0.662, + "args": { + "External id": 292657,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "1048576000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367613537.803, "dur": 1.697, + "args": { + "External id": 292658,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367613547.605, "dur": 6.227, + "args": { + "External id": 292659,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367613550.199, "dur": 3.378, + "args": { + "External id": 292660,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367613668.575, "dur": 178.900, + "args": { + "External id": 292661,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367613672.219, "dur": 3.397, + "args": { + "External id": 292662,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367613677.827, "dur": 169.319, + "args": { + "External id": 292663,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367613679.482, "dur": 0.620, + "args": { + "External id": 292664,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367613683.402, "dur": 26.731, + "args": { + "External id": 292665,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367613711.792, "dur": 3.738, + "args": { + "External id": 292666,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367613713.938, "dur": 1.271, + "args": { + "External id": 292667,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367613716.440, "dur": 21.697, + "args": { + "External id": 292668,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367613717.336, "dur": 1.466, + "args": { + "External id": 292669,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367613720.115, "dur": 17.717, + "args": { + "External id": 292670,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367613725.467, "dur": 2.005, + "args": { + "External id": 292671,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367613739.871, "dur": 19.500, + "args": { + "External id": 292672,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367613760.982, "dur": 12.162, + "args": { + "External id": 292673,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367613775.757, "dur": 11.984, + "args": { + "External id": 292674,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367613789.192, "dur": 11.001, + "args": { + "External id": 292675,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367613804.322, "dur": 18.499, + "args": { + "External id": 292676,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367613805.973, "dur": 1.238, + "args": { + "External id": 292677,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367613809.783, "dur": 0.723, + "args": { + "External id": 292678,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367613824.079, "dur": 10.480, + "args": { + "External id": 292679,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367613835.644, "dur": 10.357, + "args": { + "External id": 292680,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367613854.339, "dur": 1.982, + "args": { + "External id": 292681,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367613868.175, "dur": 1.300, + "args": { + "External id": 292682,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367613874.994, "dur": 4.275, + "args": { + "External id": 292683,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367613876.682, "dur": 2.340, + "args": { + "External id": 292684,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367613953.024, "dur": 144.434, + "args": { + "External id": 292685,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367613954.900, "dur": 1.738, + "args": { + "External id": 292686,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367613957.568, "dur": 139.619, + "args": { + "External id": 292687,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367613963.348, "dur": 0.159, + "args": { + "External id": 292688,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367613964.787, "dur": 14.451, + "args": { + "External id": 292689,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367613980.605, "dur": 3.053, + "args": { + "External id": 292690,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367613982.562, "dur": 0.809, + "args": { + "External id": 292691,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367613984.375, "dur": 18.126, + "args": { + "External id": 292692,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367613987.363, "dur": 1.248, + "args": { + "External id": 292693,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367613989.499, "dur": 12.779, + "args": { + "External id": 292694,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367613990.921, "dur": 1.617, + "args": { + "External id": 292695,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367614003.407, "dur": 14.068, + "args": { + "External id": 292696,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367614018.572, "dur": 10.403, + "args": { + "External id": 292697,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367614032.912, "dur": 11.024, + "args": { + "External id": 292698,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367614044.820, "dur": 10.332, + "args": { + "External id": 292699,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367614056.507, "dur": 16.469, + "args": { + "External id": 292700,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367614057.668, "dur": 0.785, + "args": { + "External id": 292701,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367614060.350, "dur": 0.865, + "args": { + "External id": 292702,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367614074.049, "dur": 10.299, + "args": { + "External id": 292703,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367614087.188, "dur": 9.096, + "args": { + "External id": 292704,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367614101.766, "dur": 1.241, + "args": { + "External id": 292705,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5333367614110.791, "dur": 24.301, + "args": { + "External id": 292706,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367614112.985, "dur": 8.308, + "args": { + "External id": 292707,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367614115.628, "dur": 5.222, + "args": { + "External id": 292708,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5333367614122.919, "dur": 11.533, + "args": { + "External id": 292709,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367614141.478, "dur": 4.536, + "args": { + "External id": 292710,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367614144.405, "dur": 0.749, + "args": { + "External id": 292711,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "32768"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367614230.076, "dur": 64.148, + "args": { + "External id": 292712,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367614300.213, "dur": 5.158, + "args": { + "External id": 292713,"Record function id": 0, "Concrete Inputs": ["", "0", "32768", "40960", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367614303.188, "dur": 0.864, + "args": { + "External id": 292714,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "67108864"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367614307.157, "dur": 24.630, + "args": { + "External id": 292715,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367614337.014, "dur": 8.568, + "args": { + "External id": 292716,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367614341.210, "dur": 3.524, + "args": { + "External id": 292717,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367614343.532, "dur": 1.043, + "args": { + "External id": 292718,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367614349.062, "dur": 40.383, + "args": { + "External id": 292719,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367614350.413, "dur": 38.433, + "args": { + "External id": 292720,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367614393.628, "dur": 14.004, + "args": { + "External id": 292721,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367614413.181, "dur": 6.081, + "args": { + "External id": 292722,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367614417.357, "dur": 0.717, + "args": { + "External id": 292723,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5333367614423.565, "dur": 47.211, + "args": { + "External id": 292724,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367614424.537, "dur": 4.284, + "args": { + "External id": 292725,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367614425.434, "dur": 2.851, + "args": { + "External id": 292726,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367614427.551, "dur": 0.605, + "args": { + "External id": 292727,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367614429.567, "dur": 40.836, + "args": { + "External id": 292728,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367614430.638, "dur": 39.249, + "args": { + "External id": 292729,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367614476.516, "dur": 3.448, + "args": { + "External id": 292730,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367614478.281, "dur": 0.954, + "args": { + "External id": 292731,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "1310720000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367614485.760, "dur": 2.157, + "args": { + "External id": 292732,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367614494.066, "dur": 7.889, + "args": { + "External id": 292733,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367614498.163, "dur": 3.468, + "args": { + "External id": 292734,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367614578.023, "dur": 216.278, + "args": { + "External id": 292735,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367614582.356, "dur": 2.266, + "args": { + "External id": 292736,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367614586.156, "dur": 207.794, + "args": { + "External id": 292737,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367614587.348, "dur": 0.405, + "args": { + "External id": 292738,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367614589.190, "dur": 20.254, + "args": { + "External id": 292739,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367614611.167, "dur": 5.819, + "args": { + "External id": 292740,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367614615.587, "dur": 1.106, + "args": { + "External id": 292741,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367614620.088, "dur": 63.062, + "args": { + "External id": 292742,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367614620.921, "dur": 38.121, + "args": { + "External id": 292743,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367614662.492, "dur": 20.419, + "args": { + "External id": 292744,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367614665.917, "dur": 3.954, + "args": { + "External id": 292745,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367614684.689, "dur": 18.516, + "args": { + "External id": 292746,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367614704.924, "dur": 13.823, + "args": { + "External id": 292747,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367614721.500, "dur": 12.556, + "args": { + "External id": 292748,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367614735.366, "dur": 11.347, + "args": { + "External id": 292749,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367614748.318, "dur": 21.092, + "args": { + "External id": 292750,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367614752.369, "dur": 0.935, + "args": { + "External id": 292751,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367614755.251, "dur": 1.227, + "args": { + "External id": 292752,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367614770.759, "dur": 10.734, + "args": { + "External id": 292753,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367614782.349, "dur": 10.499, + "args": { + "External id": 292754,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367614801.957, "dur": 2.248, + "args": { + "External id": 292755,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367614816.020, "dur": 1.233, + "args": { + "External id": 292756,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367614823.125, "dur": 4.001, + "args": { + "External id": 292757,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367614824.461, "dur": 2.393, + "args": { + "External id": 292758,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367614903.008, "dur": 156.890, + "args": { + "External id": 292759,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367614904.490, "dur": 2.879, + "args": { + "External id": 292760,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367614908.411, "dur": 151.199, + "args": { + "External id": 292761,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367614911.841, "dur": 0.329, + "args": { + "External id": 292762,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367614913.169, "dur": 17.150, + "args": { + "External id": 292763,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367614932.267, "dur": 3.713, + "args": { + "External id": 292764,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367614934.504, "dur": 1.253, + "args": { + "External id": 292765,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367614936.850, "dur": 18.038, + "args": { + "External id": 292766,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367614937.686, "dur": 1.339, + "args": { + "External id": 292767,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367614940.271, "dur": 14.333, + "args": { + "External id": 292768,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367614943.572, "dur": 1.268, + "args": { + "External id": 292769,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367614956.161, "dur": 14.304, + "args": { + "External id": 292770,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367614971.470, "dur": 10.913, + "args": { + "External id": 292771,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367614986.611, "dur": 10.631, + "args": { + "External id": 292772,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367614998.061, "dur": 13.497, + "args": { + "External id": 292773,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367615013.001, "dur": 20.837, + "args": { + "External id": 292774,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367615014.343, "dur": 1.110, + "args": { + "External id": 292775,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367615017.838, "dur": 0.549, + "args": { + "External id": 292776,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367615034.998, "dur": 11.445, + "args": { + "External id": 292777,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367615049.441, "dur": 9.020, + "args": { + "External id": 292778,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367615064.407, "dur": 1.323, + "args": { + "External id": 292779,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5333367615073.584, "dur": 25.271, + "args": { + "External id": 292780,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367615075.806, "dur": 6.737, + "args": { + "External id": 292781,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367615078.334, "dur": 3.789, + "args": { + "External id": 292782,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5333367615084.130, "dur": 14.011, + "args": { + "External id": 292783,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367615105.833, "dur": 4.516, + "args": { + "External id": 292784,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367615108.497, "dur": 0.849, + "args": { + "External id": 292785,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "40960"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367615186.749, "dur": 70.699, + "args": { + "External id": 292786,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367615263.401, "dur": 6.082, + "args": { + "External id": 292787,"Record function id": 0, "Concrete Inputs": ["", "0", "40960", "49152", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367615267.070, "dur": 0.796, + "args": { + "External id": 292788,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "83886080"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367615271.120, "dur": 26.635, + "args": { + "External id": 292789,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367615303.109, "dur": 7.951, + "args": { + "External id": 292790,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367615306.947, "dur": 3.400, + "args": { + "External id": 292791,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367615309.098, "dur": 1.023, + "args": { + "External id": 292792,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367615314.284, "dur": 67.766, + "args": { + "External id": 292793,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367615315.332, "dur": 66.076, + "args": { + "External id": 292794,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367615387.693, "dur": 34.348, + "args": { + "External id": 292795,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367615428.973, "dur": 6.274, + "args": { + "External id": 292796,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367615433.285, "dur": 0.920, + "args": { + "External id": 292797,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5333367615439.817, "dur": 71.515, + "args": { + "External id": 292798,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367615440.834, "dur": 4.079, + "args": { + "External id": 292799,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367615441.426, "dur": 2.944, + "args": { + "External id": 292800,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367615443.435, "dur": 0.797, + "args": { + "External id": 292801,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367615445.833, "dur": 65.185, + "args": { + "External id": 292802,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367615446.341, "dur": 64.073, + "args": { + "External id": 292803,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367615515.485, "dur": 5.306, + "args": { + "External id": 292804,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367615519.380, "dur": 0.573, + "args": { + "External id": 292805,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "1572864000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367615527.267, "dur": 2.226, + "args": { + "External id": 292806,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367615536.352, "dur": 6.323, + "args": { + "External id": 292807,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367615538.406, "dur": 3.954, + "args": { + "External id": 292808,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367615663.377, "dur": 174.452, + "args": { + "External id": 292809,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367615667.265, "dur": 3.355, + "args": { + "External id": 292810,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367615673.944, "dur": 163.443, + "args": { + "External id": 292811,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367615675.260, "dur": 0.435, + "args": { + "External id": 292812,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367615677.115, "dur": 23.489, + "args": { + "External id": 292813,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367615702.260, "dur": 4.919, + "args": { + "External id": 292814,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367615704.484, "dur": 2.339, + "args": { + "External id": 292815,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367615708.275, "dur": 21.638, + "args": { + "External id": 292816,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367615709.195, "dur": 1.354, + "args": { + "External id": 292817,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367615713.786, "dur": 15.910, + "args": { + "External id": 292818,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367615716.596, "dur": 3.106, + "args": { + "External id": 292819,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367615731.234, "dur": 17.695, + "args": { + "External id": 292820,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367615750.274, "dur": 12.878, + "args": { + "External id": 292821,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367615766.297, "dur": 12.347, + "args": { + "External id": 292822,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367615779.789, "dur": 10.486, + "args": { + "External id": 292823,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367615792.000, "dur": 19.991, + "args": { + "External id": 292824,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367615793.968, "dur": 1.230, + "args": { + "External id": 292825,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367615799.086, "dur": 0.602, + "args": { + "External id": 292826,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367615813.529, "dur": 10.749, + "args": { + "External id": 292827,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367615825.395, "dur": 11.016, + "args": { + "External id": 292828,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367615844.433, "dur": 1.950, + "args": { + "External id": 292829,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367615856.386, "dur": 1.165, + "args": { + "External id": 292830,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367615862.998, "dur": 5.333, + "args": { + "External id": 292831,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367615866.019, "dur": 2.054, + "args": { + "External id": 292832,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367615937.401, "dur": 148.842, + "args": { + "External id": 292833,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367615940.927, "dur": 1.965, + "args": { + "External id": 292834,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367615944.488, "dur": 141.517, + "args": { + "External id": 292835,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367615945.510, "dur": 0.167, + "args": { + "External id": 292836,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367615946.430, "dur": 14.462, + "args": { + "External id": 292837,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367615962.427, "dur": 5.005, + "args": { + "External id": 292838,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367615966.181, "dur": 1.004, + "args": { + "External id": 292839,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367615968.338, "dur": 18.507, + "args": { + "External id": 292840,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367615971.272, "dur": 1.205, + "args": { + "External id": 292841,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367615973.383, "dur": 13.241, + "args": { + "External id": 292842,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367615974.861, "dur": 1.575, + "args": { + "External id": 292843,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367615987.765, "dur": 13.489, + "args": { + "External id": 292844,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367616002.281, "dur": 13.025, + "args": { + "External id": 292845,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367616017.793, "dur": 12.587, + "args": { + "External id": 292846,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367616031.205, "dur": 11.342, + "args": { + "External id": 292847,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367616044.187, "dur": 17.438, + "args": { + "External id": 292848,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367616045.738, "dur": 0.915, + "args": { + "External id": 292849,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367616050.108, "dur": 0.859, + "args": { + "External id": 292850,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367616062.644, "dur": 10.052, + "args": { + "External id": 292851,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367616073.519, "dur": 11.611, + "args": { + "External id": 292852,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367616090.568, "dur": 1.132, + "args": { + "External id": 292853,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5333367616099.219, "dur": 27.297, + "args": { + "External id": 292854,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367616101.582, "dur": 8.667, + "args": { + "External id": 292855,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367616106.378, "dur": 3.455, + "args": { + "External id": 292856,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5333367616111.800, "dur": 13.972, + "args": { + "External id": 292857,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367616132.869, "dur": 3.909, + "args": { + "External id": 292858,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8390 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367616135.211, "dur": 0.613, + "args": { + "External id": 292859,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "49152"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367616221.513, "dur": 65.083, + "args": { + "External id": 292860,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367616292.937, "dur": 8.539, + "args": { + "External id": 292861,"Record function id": 0, "Concrete Inputs": ["", "0", "49152", "57344", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367616298.856, "dur": 1.027, + "args": { + "External id": 292862,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "100663296"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8394 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367616303.283, "dur": 25.901, + "args": { + "External id": 292863,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367616333.846, "dur": 34.789, + "args": { + "External id": 292864,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367616363.635, "dur": 4.299, + "args": { + "External id": 292865,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367616366.737, "dur": 0.986, + "args": { + "External id": 292866,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367616371.710, "dur": 44.196, + "args": { + "External id": 292867,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367616375.125, "dur": 40.206, + "args": { + "External id": 292868,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367616420.490, "dur": 14.485, + "args": { + "External id": 292869,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367616440.642, "dur": 4.440, + "args": { + "External id": 292870,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367616443.193, "dur": 0.668, + "args": { + "External id": 292871,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linear", "pid": 2070552, "tid": 2070552, + "ts": 5333367616449.249, "dur": 50.542, + "args": { + "External id": 292872,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["c10::BFloat16", "c10::BFloat16", ""], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [32000, 2048], []], "Ev Idx": 8404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367616450.057, "dur": 8.522, + "args": { + "External id": 292873,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[2048, 1]], "Input Dims": [[32000, 2048]], "Ev Idx": 8405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367616451.094, "dur": 6.708, + "args": { + "External id": 292874,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], []], "Input Dims": [[32000, 2048], [], []], "Ev Idx": 8406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367616456.645, "dur": 1.026, + "args": { + "External id": 292875,"Record function id": 0, "Concrete Inputs": ["", "[2048, 32000]", "[1, 2048]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[32000, 2048], [], [], []], "Ev Idx": 8407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367616459.200, "dur": 40.145, + "args": { + "External id": 292876,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367616459.987, "dur": 38.645, + "args": { + "External id": 292877,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[2048, 1], [1, 2048]], "Input Dims": [[8192, 2048], [2048, 32000]], "Ev Idx": 8409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367616503.490, "dur": 3.344, + "args": { + "External id": 292878,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::Half", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], [], [], []], "Input Dims": [[65536, 32000], [], [], [], []], "Ev Idx": 8410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367616505.268, "dur": 0.726, + "args": { + "External id": 292879,"Record function id": 0, "Concrete Inputs": ["", "[8192, 32000]", "[32000, 1]", "1835008000"], "Input type": ["c10::Half", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[65536, 32000], [], [], []], "Ev Idx": 8411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367616514.625, "dur": 2.002, + "args": { + "External id": 292880,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::BFloat16", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367616524.158, "dur": 5.983, + "args": { + "External id": 292881,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::BFloat16", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367616525.828, "dur": 3.986, + "args": { + "External id": 292882,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367616609.540, "dur": 217.534, + "args": { + "External id": 292883,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367616611.850, "dur": 1.987, + "args": { + "External id": 292884,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367616617.297, "dur": 209.326, + "args": { + "External id": 292885,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367616618.342, "dur": 0.548, + "args": { + "External id": 292886,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367616621.634, "dur": 63.715, + "args": { + "External id": 292887,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367616688.433, "dur": 3.273, + "args": { + "External id": 292888,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367616690.581, "dur": 0.751, + "args": { + "External id": 292889,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367616692.617, "dur": 22.878, + "args": { + "External id": 292890,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8422 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367616693.744, "dur": 1.648, + "args": { + "External id": 292891,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8423 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367616696.493, "dur": 18.662, + "args": { + "External id": 292892,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367616701.115, "dur": 2.704, + "args": { + "External id": 292893,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367616718.922, "dur": 19.457, + "args": { + "External id": 292894,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367616739.763, "dur": 11.926, + "args": { + "External id": 292895,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367616754.584, "dur": 12.592, + "args": { + "External id": 292896,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367616768.610, "dur": 11.192, + "args": { + "External id": 292897,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367616781.575, "dur": 18.868, + "args": { + "External id": 292898,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367616783.636, "dur": 1.190, + "args": { + "External id": 292899,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367616787.066, "dur": 0.875, + "args": { + "External id": 292900,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367616803.770, "dur": 10.496, + "args": { + "External id": 292901,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367616815.313, "dur": 10.359, + "args": { + "External id": 292902,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367616834.481, "dur": 2.257, + "args": { + "External id": 292903,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367616846.719, "dur": 1.115, + "args": { + "External id": 292904,"Record function id": 0, "Concrete Inputs": ["", "[-1, 32000]"], "Input type": ["c10::Half", "ScalarList"], "Input Strides": [[32000, 1], []], "Input Dims": [[8192, 32000], []], "Ev Idx": 8436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::new_empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367616853.133, "dur": 6.020, + "args": { + "External id": 292905,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "6", "", "", "False"], "Input type": ["c10::Half", "ScalarList", "Scalar", "", "", "Scalar"], "Input Strides": [[32000, 1], [], [], [], [], []], "Input Dims": [[8192, 32000], [], [], [], [], []], "Ev Idx": 8437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367616856.355, "dur": 2.516, + "args": { + "External id": 292906,"Record function id": 0, "Concrete Inputs": ["[8192, 1]", "6", "0", "", "False", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "Scalar", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367616929.152, "dur": 151.811, + "args": { + "External id": 292907,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367616931.234, "dur": 1.891, + "args": { + "External id": 292908,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::logsumexp", "pid": 2070552, "tid": 2070552, + "ts": 5333367616936.315, "dur": 144.323, + "args": { + "External id": 292909,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", "float"], "Input Strides": [[1, 1], [], [], [1]], "Input Dims": [[8192, 1], [], [], [0]], "Ev Idx": 8441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::real", "pid": 2070552, "tid": 2070552, + "ts": 5333367616937.671, "dur": 0.421, + "args": { + "External id": 292910,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::amax", "pid": 2070552, "tid": 2070552, + "ts": 5333367616938.886, "dur": 14.869, + "args": { + "External id": 292911,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "True"], "Input type": ["float", "ScalarList", "Scalar"], "Input Strides": [[1, 1], [], []], "Input Dims": [[8192, 1], [], []], "Ev Idx": 8443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::squeeze", "pid": 2070552, "tid": 2070552, + "ts": 5333367616955.330, "dur": 5.250, + "args": { + "External id": 292912,"Record function id": 0, "Concrete Inputs": ["", "[-1]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1, 1], []], "Input Dims": [[8192, 1], []], "Ev Idx": 8444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367616957.403, "dur": 2.974, + "args": { + "External id": 292913,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1, 1], [], [], []], "Input Dims": [[8192, 1], [], [], []], "Ev Idx": 8445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367616961.421, "dur": 21.362, + "args": { + "External id": 292914,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333367616964.524, "dur": 1.668, + "args": { + "External id": 292915,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333367616967.068, "dur": 15.463, + "args": { + "External id": 292916,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[1], [1]], "Input Dims": [[8192], [0]], "Ev Idx": 8448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367616968.881, "dur": 1.449, + "args": { + "External id": 292917,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333367616983.675, "dur": 15.061, + "args": { + "External id": 292918,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::masked_fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367616999.880, "dur": 11.303, + "args": { + "External id": 292919,"Record function id": 0, "Concrete Inputs": ["", "", "0"], "Input type": ["float", "bool", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sub", "pid": 2070552, "tid": 2070552, + "ts": 5333367617013.394, "dur": 10.912, + "args": { + "External id": 292920,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1, 1], [1, 1], []], "Input Dims": [[8192, 1], [8192, 1], []], "Ev Idx": 8452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::exp_", "pid": 2070552, "tid": 2070552, + "ts": 5333367617025.159, "dur": 10.907, + "args": { + "External id": 292921,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1, 1]], "Input Dims": [[8192, 1]], "Ev Idx": 8453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367617037.171, "dur": 19.021, + "args": { + "External id": 292922,"Record function id": 0, "Concrete Inputs": ["", "[-1]", "False", "", ""], "Input type": ["float", "ScalarList", "Scalar", "", "float"], "Input Strides": [[1, 1], [], [], [], [1]], "Input Dims": [[8192, 1], [], [], [], [0]], "Ev Idx": 8454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333367617038.306, "dur": 1.003, + "args": { + "External id": 292923,"Record function id": 0, "Concrete Inputs": ["", "[8192]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 8455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367617042.887, "dur": 1.878, + "args": { + "External id": 292924,"Record function id": 0, "Concrete Inputs": ["", "[8192, 1]", "[1, 0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::log_", "pid": 2070552, "tid": 2070552, + "ts": 5333367617057.260, "dur": 10.249, + "args": { + "External id": 292925,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[8192]], "Ev Idx": 8457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367617068.133, "dur": 11.775, + "args": { + "External id": 292926,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[1], [1], []], "Input Dims": [[8192], [8192], []], "Ev Idx": 8458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333367617085.770, "dur": 1.214, + "args": { + "External id": 292927,"Record function id": 0, "Concrete Inputs": ["", "[8192]"], "Input type": ["float", "ScalarList"], "Input Strides": [[1], []], "Input Dims": [[8192], []], "Ev Idx": 8459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5333367617093.843, "dur": 22.718, + "args": { + "External id": 292928,"Record function id": 0, "Concrete Inputs": ["", "inf", "", ""], "Input type": ["float", "Scalar", "", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[8192], [], [], []], "Ev Idx": 8460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367617096.151, "dur": 5.651, + "args": { + "External id": 292929,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", ""], "Input type": ["float", "", "", "", "", ""], "Input Strides": [[1], [], [], [], [], []], "Input Dims": [[8192], [], [], [], [], []], "Ev Idx": 8461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367617098.079, "dur": 3.277, + "args": { + "External id": 292930,"Record function id": 0, "Concrete Inputs": ["[8192]", "[1]", "6", "0", "", ""], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::nan_to_num", "pid": 2070552, "tid": 2070552, + "ts": 5333367617103.439, "dur": 12.435, + "args": { + "External id": 292931,"Record function id": 0, "Concrete Inputs": ["", "inf", "", "", ""], "Input type": ["float", "Scalar", "", "", "float"], "Input Strides": [[1], [], [], [], [1]], "Input Dims": [[8192], [], [], [], [8192]], "Ev Idx": 8463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367617122.645, "dur": 4.312, + "args": { + "External id": 292932,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["float", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[1], [], [], [], []], "Input Dims": [[65536], [], [], [], []], "Ev Idx": 8464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367617125.126, "dur": 0.961, + "args": { + "External id": 292933,"Record function id": 0, "Concrete Inputs": ["", "[8192]", "[1]", "57344"], "Input type": ["float", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367617204.359, "dur": 61.087, + "args": { + "External id": 292934,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[32000, 1], [2048, 1]], "Input Dims": [[8192, 32000], [32000, 2048]], "Ev Idx": 8466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::slice", "pid": 2070552, "tid": 2070552, + "ts": 5333367617273.614, "dur": 5.806, + "args": { + "External id": 292935,"Record function id": 0, "Concrete Inputs": ["", "0", "57344", "65536", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar", "Scalar", "Scalar"], "Input Strides": [[2048, 1], [], [], [], []], "Input Dims": [[65536, 2048], [], [], [], []], "Ev Idx": 8467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367617276.852, "dur": 1.000, + "args": { + "External id": 292936,"Record function id": 0, "Concrete Inputs": ["", "[8192, 2048]", "[2048, 1]", "117440512"], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", "Scalar"], "Input Strides": [[2048, 1], [], [], []], "Input Dims": [[65536, 2048], [], [], []], "Ev Idx": 8468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333367617281.078, "dur": 23.243, + "args": { + "External id": 292937,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["c10::BFloat16", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[8192, 2048], [8192, 2048], []], "Ev Idx": 8469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::t", "pid": 2070552, "tid": 2070552, + "ts": 5333367617309.416, "dur": 6.057, + "args": { + "External id": 292938,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["c10::BFloat16"], "Input Strides": [[32000, 1]], "Input Dims": [[8192, 32000]], "Ev Idx": 8470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::transpose", "pid": 2070552, "tid": 2070552, + "ts": 5333367617311.215, "dur": 3.552, + "args": { + "External id": 292939,"Record function id": 0, "Concrete Inputs": ["", "0", "1"], "Input type": ["c10::BFloat16", "Scalar", "Scalar"], "Input Strides": [[32000, 1], [], []], "Input Dims": [[8192, 32000], [], []], "Ev Idx": 8471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367617313.396, "dur": 1.147, + "args": { + "External id": 292940,"Record function id": 0, "Concrete Inputs": ["", "[32000, 8192]", "[1, 32000]", ""], "Input type": ["c10::BFloat16", "ScalarList", "ScalarList", ""], "Input Strides": [[32000, 1], [], [], []], "Input Dims": [[8192, 32000], [], [], []], "Ev Idx": 8472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::matmul", "pid": 2070552, "tid": 2070552, + "ts": 5333367617320.551, "dur": 40.755, + "args": { + "External id": 292941,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mm", "pid": 2070552, "tid": 2070552, + "ts": 5333367617322.237, "dur": 38.527, + "args": { + "External id": 292942,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["c10::BFloat16", "c10::BFloat16"], "Input Strides": [[1, 32000], [2048, 1]], "Input Dims": [[32000, 8192], [8192, 2048]], "Ev Idx": 8474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add_", "pid": 2070552, "tid": 2070552, + "ts": 5333367617365.618, "dur": 15.020, + "args": { + "External id": 292943,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "c10::BFloat16", "Scalar"], "Input Strides": [[2048, 1], [2048, 1], []], "Input Dims": [[32000, 2048], [32000, 2048], []], "Ev Idx": 8475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367617385.387, "dur": 30.083, + "args": { + "External id": 292944,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[1], []], "Input Dims": [[65536], []], "Ev Idx": 8476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::sum", "pid": 2070552, "tid": 2070552, + "ts": 5333367617387.921, "dur": 27.155, + "args": { + "External id": 292945,"Record function id": 0, "Concrete Inputs": ["", "[]", "False", ""], "Input type": ["float", "ScalarList", "Scalar", ""], "Input Strides": [[1], [], [], []], "Input Dims": [[65536], [], [], []], "Ev Idx": 8477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367617395.216, "dur": 0.667, + "args": { + "External id": 292946,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2070552, "tid": 2070552, + "ts": 5333367617423.697, "dur": 17.877, + "args": { + "External id": 292947,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070552, "tid": 2070552, + "ts": 5333367617488.574, "dur": 18.911, + "args": { + "External id": 292948,"Sequence number": 1209230, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8480 + } + }, + { + "ph": "s", "id": 2, "pid": 2070552, "tid": 2070552, "ts": 5333367617488.574, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "user_annotation", "name": "FSDP::post_forward", "pid": 2070552, "tid": 2070552, + "ts": 5333367617613.124, "dur": 84.327, + "args": { + "External id": 292949,"Record function id": 0, "Ev Idx": 8481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2070552, "tid": 2070552, + "ts": 5333367617796.693, "dur": 35.179, + "args": { + "External id": 292950,"Sequence number": 1209231, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8482 + } + }, + { + "ph": "s", "id": 1, "pid": 2070552, "tid": 2070552, "ts": 5333367617796.693, + "cat": "fwdbwd", "name": "fwdbwd" + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ones_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367617875.655, "dur": 27.962, + "args": { + "External id": 292951,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333367617877.443, "dur": 8.948, + "args": { + "External id": 292952,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "False", "1"], "Input type": ["float", "", "", "", "Scalar", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333367617881.354, "dur": 4.419, + "args": { + "External id": 292953,"Record function id": 0, "Concrete Inputs": ["[]", "[]", "6", "0", "", "False"], "Input type": ["ScalarList", "ScalarList", "Scalar", "Scalar", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333367617888.486, "dur": 14.784, + "args": { + "External id": 292954,"Record function id": 0, "Concrete Inputs": ["", "1."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2070552, "tid": 2070552, + "ts": 5333368983934.156, "dur": 54.638, + "args": { + "External id": 292955,"Sequence number": 1209232, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::div", "pid": 2070552, "tid": 2070552, + "ts": 5333368983999.271, "dur": 16.208, + "args": { + "External id": 292956,"Sequence number": 1209233, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "long int"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070552, "tid": 2070552, + "ts": 5333368984022.908, "dur": 134.807, + "args": { + "External id": 292957,"Sequence number": 1209234, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070552, "tid": 2070552, + "ts": 5333368984477.346, "dur": 22.941, + "args": { + "External id": 292958,"Sequence number": 1209235, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070552, "tid": 2070552, + "ts": 5333368984506.498, "dur": 11.859, + "args": { + "External id": 292959,"Sequence number": 1209236, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "long int", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 2070552, "tid": 2070552, + "ts": 5333368986331.985, "dur": 3008.102, + "args": { + "External id": 292960,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_norm", "pid": 2070552, "tid": 2070552, + "ts": 5333368986971.912, "dur": 915.385, + "args": { + "External id": 292961,"Record function id": 0, "Concrete Inputs": ["", "2.", ""], "Input type": ["TensorList", "Scalar", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 8493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zeros", "pid": 2070552, "tid": 2070552, + "ts": 5333368986990.617, "dur": 66.604, + "args": { + "External id": 292962,"Record function id": 0, "Concrete Inputs": ["[36500]", "6", "0", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 8494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333368986994.274, "dur": 12.347, + "args": { + "External id": 292963,"Record function id": 0, "Concrete Inputs": ["[36500]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 8495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::zero_", "pid": 2070552, "tid": 2070552, + "ts": 5333368987009.735, "dur": 47.117, + "args": { + "External id": 292964,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[1]], "Input Dims": [[36500]], "Ev Idx": 8496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::fill_", "pid": 2070552, "tid": 2070552, + "ts": 5333368987011.736, "dur": 44.429, + "args": { + "External id": 292965,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[1], []], "Input Dims": [[36500], []], "Ev Idx": 8497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989370.240, "dur": 3.392, + "args": { + "External id": 292966,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989375.744, "dur": 0.449, + "args": { + "External id": 292967,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989377.541, "dur": 0.364, + "args": { + "External id": 292968,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989378.983, "dur": 0.328, + "args": { + "External id": 292969,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989380.704, "dur": 0.346, + "args": { + "External id": 292970,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989382.405, "dur": 0.251, + "args": { + "External id": 292971,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989384.155, "dur": 0.229, + "args": { + "External id": 292972,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989387.385, "dur": 0.213, + "args": { + "External id": 292973,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989388.405, "dur": 0.483, + "args": { + "External id": 292974,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989389.832, "dur": 0.325, + "args": { + "External id": 292975,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989391.010, "dur": 0.320, + "args": { + "External id": 292976,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989392.365, "dur": 0.374, + "args": { + "External id": 292977,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989393.515, "dur": 0.356, + "args": { + "External id": 292978,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989394.683, "dur": 0.215, + "args": { + "External id": 292979,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989395.811, "dur": 0.245, + "args": { + "External id": 292980,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989398.618, "dur": 0.213, + "args": { + "External id": 292981,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989399.590, "dur": 0.268, + "args": { + "External id": 292982,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989400.735, "dur": 0.246, + "args": { + "External id": 292983,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989401.745, "dur": 0.222, + "args": { + "External id": 292984,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989402.736, "dur": 0.216, + "args": { + "External id": 292985,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989403.751, "dur": 0.252, + "args": { + "External id": 292986,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989405.042, "dur": 0.215, + "args": { + "External id": 292987,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989406.025, "dur": 0.219, + "args": { + "External id": 292988,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989408.659, "dur": 0.217, + "args": { + "External id": 292989,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989409.680, "dur": 0.223, + "args": { + "External id": 292990,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989411.197, "dur": 0.246, + "args": { + "External id": 292991,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989412.363, "dur": 0.230, + "args": { + "External id": 292992,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989413.362, "dur": 0.216, + "args": { + "External id": 292993,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989414.344, "dur": 0.220, + "args": { + "External id": 292994,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989415.325, "dur": 0.215, + "args": { + "External id": 292995,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989416.318, "dur": 0.227, + "args": { + "External id": 292996,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989419.285, "dur": 0.279, + "args": { + "External id": 292997,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989420.393, "dur": 0.251, + "args": { + "External id": 292998,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989421.616, "dur": 0.351, + "args": { + "External id": 292999,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989422.828, "dur": 0.283, + "args": { + "External id": 293000,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989424.232, "dur": 0.218, + "args": { + "External id": 293001,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989425.369, "dur": 0.334, + "args": { + "External id": 293002,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989426.689, "dur": 0.214, + "args": { + "External id": 293003,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989427.857, "dur": 0.357, + "args": { + "External id": 293004,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989430.469, "dur": 0.279, + "args": { + "External id": 293005,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989431.494, "dur": 0.330, + "args": { + "External id": 293006,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989432.756, "dur": 0.319, + "args": { + "External id": 293007,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989434.012, "dur": 0.223, + "args": { + "External id": 293008,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989435.035, "dur": 0.224, + "args": { + "External id": 293009,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989436.075, "dur": 0.234, + "args": { + "External id": 293010,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989437.111, "dur": 0.220, + "args": { + "External id": 293011,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989438.092, "dur": 0.226, + "args": { + "External id": 293012,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989441.080, "dur": 0.220, + "args": { + "External id": 293013,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989442.042, "dur": 0.220, + "args": { + "External id": 293014,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989443.022, "dur": 0.218, + "args": { + "External id": 293015,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989443.988, "dur": 0.221, + "args": { + "External id": 293016,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989444.967, "dur": 0.218, + "args": { + "External id": 293017,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989445.999, "dur": 0.545, + "args": { + "External id": 293018,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989447.508, "dur": 0.430, + "args": { + "External id": 293019,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989448.715, "dur": 0.440, + "args": { + "External id": 293020,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989451.864, "dur": 0.429, + "args": { + "External id": 293021,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989453.072, "dur": 0.221, + "args": { + "External id": 293022,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989454.068, "dur": 0.220, + "args": { + "External id": 293023,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989455.090, "dur": 0.219, + "args": { + "External id": 293024,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989456.084, "dur": 0.214, + "args": { + "External id": 293025,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989457.047, "dur": 0.231, + "args": { + "External id": 293026,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989458.132, "dur": 0.216, + "args": { + "External id": 293027,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989459.091, "dur": 0.404, + "args": { + "External id": 293028,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989461.948, "dur": 0.232, + "args": { + "External id": 293029,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989462.925, "dur": 0.235, + "args": { + "External id": 293030,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989464.087, "dur": 0.357, + "args": { + "External id": 293031,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989465.238, "dur": 0.233, + "args": { + "External id": 293032,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989466.232, "dur": 0.229, + "args": { + "External id": 293033,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989467.217, "dur": 0.350, + "args": { + "External id": 293034,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989468.332, "dur": 0.215, + "args": { + "External id": 293035,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989469.292, "dur": 0.219, + "args": { + "External id": 293036,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989471.788, "dur": 0.218, + "args": { + "External id": 293037,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989472.842, "dur": 0.251, + "args": { + "External id": 293038,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989473.879, "dur": 0.216, + "args": { + "External id": 293039,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989474.928, "dur": 0.243, + "args": { + "External id": 293040,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989475.926, "dur": 0.225, + "args": { + "External id": 293041,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989476.898, "dur": 0.221, + "args": { + "External id": 293042,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989479.092, "dur": 0.237, + "args": { + "External id": 293043,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989480.272, "dur": 0.259, + "args": { + "External id": 293044,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989482.831, "dur": 0.514, + "args": { + "External id": 293045,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989484.328, "dur": 0.243, + "args": { + "External id": 293046,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989485.693, "dur": 0.210, + "args": { + "External id": 293047,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989486.839, "dur": 0.279, + "args": { + "External id": 293048,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989487.878, "dur": 0.428, + "args": { + "External id": 293049,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989489.130, "dur": 0.435, + "args": { + "External id": 293050,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989495.148, "dur": 0.470, + "args": { + "External id": 293051,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989496.419, "dur": 0.396, + "args": { + "External id": 293052,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989499.331, "dur": 0.227, + "args": { + "External id": 293053,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989500.449, "dur": 0.219, + "args": { + "External id": 293054,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989501.467, "dur": 0.224, + "args": { + "External id": 293055,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989502.470, "dur": 0.235, + "args": { + "External id": 293056,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989503.508, "dur": 0.249, + "args": { + "External id": 293057,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989504.812, "dur": 0.214, + "args": { + "External id": 293058,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989506.746, "dur": 0.215, + "args": { + "External id": 293059,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989507.751, "dur": 0.209, + "args": { + "External id": 293060,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989510.530, "dur": 0.234, + "args": { + "External id": 293061,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989511.577, "dur": 0.225, + "args": { + "External id": 293062,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989512.535, "dur": 0.217, + "args": { + "External id": 293063,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989513.502, "dur": 0.215, + "args": { + "External id": 293064,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989514.463, "dur": 0.214, + "args": { + "External id": 293065,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989515.497, "dur": 0.212, + "args": { + "External id": 293066,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989516.437, "dur": 0.215, + "args": { + "External id": 293067,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989517.465, "dur": 0.347, + "args": { + "External id": 293068,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989520.116, "dur": 0.231, + "args": { + "External id": 293069,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989521.268, "dur": 0.355, + "args": { + "External id": 293070,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989522.623, "dur": 0.245, + "args": { + "External id": 293071,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989523.794, "dur": 0.350, + "args": { + "External id": 293072,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989525.098, "dur": 0.305, + "args": { + "External id": 293073,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989526.156, "dur": 0.208, + "args": { + "External id": 293074,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989527.571, "dur": 0.221, + "args": { + "External id": 293075,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989529.817, "dur": 0.212, + "args": { + "External id": 293076,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989532.798, "dur": 0.218, + "args": { + "External id": 293077,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989534.006, "dur": 0.231, + "args": { + "External id": 293078,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989535.021, "dur": 0.216, + "args": { + "External id": 293079,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989536.002, "dur": 0.216, + "args": { + "External id": 293080,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989536.944, "dur": 0.214, + "args": { + "External id": 293081,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989537.909, "dur": 0.212, + "args": { + "External id": 293082,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989538.862, "dur": 0.219, + "args": { + "External id": 293083,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989539.832, "dur": 0.211, + "args": { + "External id": 293084,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989542.535, "dur": 0.216, + "args": { + "External id": 293085,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989543.555, "dur": 0.205, + "args": { + "External id": 293086,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989544.555, "dur": 0.213, + "args": { + "External id": 293087,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989545.675, "dur": 0.213, + "args": { + "External id": 293088,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989546.766, "dur": 0.440, + "args": { + "External id": 293089,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989548.146, "dur": 0.222, + "args": { + "External id": 293090,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989549.088, "dur": 0.214, + "args": { + "External id": 293091,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989550.030, "dur": 0.209, + "args": { + "External id": 293092,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989552.739, "dur": 0.215, + "args": { + "External id": 293093,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989553.689, "dur": 0.212, + "args": { + "External id": 293094,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989555.042, "dur": 0.361, + "args": { + "External id": 293095,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989556.267, "dur": 0.360, + "args": { + "External id": 293096,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989557.377, "dur": 0.357, + "args": { + "External id": 293097,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989558.466, "dur": 0.377, + "args": { + "External id": 293098,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989559.615, "dur": 0.212, + "args": { + "External id": 293099,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989560.575, "dur": 0.322, + "args": { + "External id": 293100,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989563.536, "dur": 0.382, + "args": { + "External id": 293101,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989564.837, "dur": 0.343, + "args": { + "External id": 293102,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989565.901, "dur": 0.330, + "args": { + "External id": 293103,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989567.015, "dur": 0.288, + "args": { + "External id": 293104,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989568.039, "dur": 0.358, + "args": { + "External id": 293105,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989569.127, "dur": 0.351, + "args": { + "External id": 293106,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989570.201, "dur": 0.218, + "args": { + "External id": 293107,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989571.169, "dur": 0.211, + "args": { + "External id": 293108,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989574.019, "dur": 0.246, + "args": { + "External id": 293109,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989575.019, "dur": 0.208, + "args": { + "External id": 293110,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989575.939, "dur": 0.214, + "args": { + "External id": 293111,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989576.920, "dur": 0.212, + "args": { + "External id": 293112,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989577.848, "dur": 0.251, + "args": { + "External id": 293113,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989578.825, "dur": 0.210, + "args": { + "External id": 293114,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989580.162, "dur": 0.218, + "args": { + "External id": 293115,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989581.216, "dur": 0.234, + "args": { + "External id": 293116,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989583.601, "dur": 0.225, + "args": { + "External id": 293117,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989584.599, "dur": 0.209, + "args": { + "External id": 293118,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989585.542, "dur": 0.212, + "args": { + "External id": 293119,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989586.557, "dur": 0.210, + "args": { + "External id": 293120,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989587.510, "dur": 0.216, + "args": { + "External id": 293121,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989588.542, "dur": 0.208, + "args": { + "External id": 293122,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989589.489, "dur": 0.216, + "args": { + "External id": 293123,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989590.446, "dur": 0.209, + "args": { + "External id": 293124,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989593.382, "dur": 0.230, + "args": { + "External id": 293125,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989594.340, "dur": 0.215, + "args": { + "External id": 293126,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989595.277, "dur": 0.214, + "args": { + "External id": 293127,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989596.401, "dur": 0.213, + "args": { + "External id": 293128,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989597.576, "dur": 0.215, + "args": { + "External id": 293129,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989598.661, "dur": 0.210, + "args": { + "External id": 293130,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989599.619, "dur": 0.215, + "args": { + "External id": 293131,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989600.565, "dur": 0.212, + "args": { + "External id": 293132,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989603.282, "dur": 0.213, + "args": { + "External id": 293133,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989604.410, "dur": 0.211, + "args": { + "External id": 293134,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989605.341, "dur": 0.222, + "args": { + "External id": 293135,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989606.297, "dur": 0.210, + "args": { + "External id": 293136,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989607.510, "dur": 0.215, + "args": { + "External id": 293137,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989608.497, "dur": 0.209, + "args": { + "External id": 293138,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989609.662, "dur": 0.231, + "args": { + "External id": 293139,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989610.727, "dur": 0.210, + "args": { + "External id": 293140,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989613.537, "dur": 0.220, + "args": { + "External id": 293141,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989614.494, "dur": 0.211, + "args": { + "External id": 293142,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989615.429, "dur": 0.221, + "args": { + "External id": 293143,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989616.399, "dur": 0.210, + "args": { + "External id": 293144,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989617.367, "dur": 0.218, + "args": { + "External id": 293145,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989618.321, "dur": 0.211, + "args": { + "External id": 293146,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989619.346, "dur": 0.211, + "args": { + "External id": 293147,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989645.681, "dur": 0.881, + "args": { + "External id": 293148,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989651.819, "dur": 0.239, + "args": { + "External id": 293149,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989652.986, "dur": 0.219, + "args": { + "External id": 293150,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989654.072, "dur": 0.213, + "args": { + "External id": 293151,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989655.319, "dur": 0.240, + "args": { + "External id": 293152,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989656.295, "dur": 0.207, + "args": { + "External id": 293153,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989657.259, "dur": 0.214, + "args": { + "External id": 293154,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989658.217, "dur": 0.210, + "args": { + "External id": 293155,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989659.196, "dur": 0.211, + "args": { + "External id": 293156,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989661.824, "dur": 0.214, + "args": { + "External id": 293157,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989662.774, "dur": 0.338, + "args": { + "External id": 293158,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989663.830, "dur": 0.332, + "args": { + "External id": 293159,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989664.909, "dur": 0.353, + "args": { + "External id": 293160,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989665.984, "dur": 0.355, + "args": { + "External id": 293161,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989667.228, "dur": 0.331, + "args": { + "External id": 293162,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989668.282, "dur": 0.240, + "args": { + "External id": 293163,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989669.478, "dur": 0.328, + "args": { + "External id": 293164,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989672.072, "dur": 0.206, + "args": { + "External id": 293165,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989673.325, "dur": 0.310, + "args": { + "External id": 293166,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989674.873, "dur": 0.205, + "args": { + "External id": 293167,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989675.888, "dur": 0.211, + "args": { + "External id": 293168,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989676.921, "dur": 0.224, + "args": { + "External id": 293169,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989677.889, "dur": 0.209, + "args": { + "External id": 293170,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989678.856, "dur": 0.208, + "args": { + "External id": 293171,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989679.901, "dur": 0.208, + "args": { + "External id": 293172,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989682.467, "dur": 0.242, + "args": { + "External id": 293173,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989683.469, "dur": 0.410, + "args": { + "External id": 293174,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989684.612, "dur": 0.308, + "args": { + "External id": 293175,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989685.821, "dur": 0.251, + "args": { + "External id": 293176,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989686.792, "dur": 0.364, + "args": { + "External id": 293177,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989687.870, "dur": 0.354, + "args": { + "External id": 293178,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989689.185, "dur": 0.201, + "args": { + "External id": 293179,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989690.127, "dur": 0.303, + "args": { + "External id": 293180,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989692.662, "dur": 0.336, + "args": { + "External id": 293181,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989693.946, "dur": 0.442, + "args": { + "External id": 293182,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989695.140, "dur": 0.205, + "args": { + "External id": 293183,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989696.093, "dur": 0.445, + "args": { + "External id": 293184,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989697.268, "dur": 0.210, + "args": { + "External id": 293185,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989698.254, "dur": 0.206, + "args": { + "External id": 293186,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989699.215, "dur": 0.205, + "args": { + "External id": 293187,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8719 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989700.142, "dur": 0.229, + "args": { + "External id": 293188,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8720 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989704.463, "dur": 0.260, + "args": { + "External id": 293189,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8721 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989705.584, "dur": 0.211, + "args": { + "External id": 293190,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8722 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989706.554, "dur": 0.204, + "args": { + "External id": 293191,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8723 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989707.482, "dur": 0.470, + "args": { + "External id": 293192,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8724 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989708.673, "dur": 0.226, + "args": { + "External id": 293193,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8725 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989709.717, "dur": 0.402, + "args": { + "External id": 293194,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8726 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989710.880, "dur": 0.415, + "args": { + "External id": 293195,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8727 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989712.044, "dur": 0.500, + "args": { + "External id": 293196,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8728 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989715.188, "dur": 0.472, + "args": { + "External id": 293197,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8729 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989716.412, "dur": 0.413, + "args": { + "External id": 293198,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8730 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989718.076, "dur": 0.203, + "args": { + "External id": 293199,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8731 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989719.050, "dur": 0.206, + "args": { + "External id": 293200,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8732 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989720.024, "dur": 0.238, + "args": { + "External id": 293201,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8733 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989721.044, "dur": 0.206, + "args": { + "External id": 293202,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8734 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989721.964, "dur": 0.201, + "args": { + "External id": 293203,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8735 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989722.890, "dur": 0.206, + "args": { + "External id": 293204,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8736 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989725.705, "dur": 0.202, + "args": { + "External id": 293205,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8737 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989726.610, "dur": 0.219, + "args": { + "External id": 293206,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8738 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989727.544, "dur": 0.202, + "args": { + "External id": 293207,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8739 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989728.480, "dur": 0.209, + "args": { + "External id": 293208,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8740 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989729.524, "dur": 0.205, + "args": { + "External id": 293209,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8741 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989730.446, "dur": 0.247, + "args": { + "External id": 293210,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8742 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989731.414, "dur": 0.223, + "args": { + "External id": 293211,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8743 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989732.360, "dur": 0.253, + "args": { + "External id": 293212,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8744 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989734.922, "dur": 0.207, + "args": { + "External id": 293213,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8745 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989735.876, "dur": 0.210, + "args": { + "External id": 293214,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8746 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989736.824, "dur": 0.238, + "args": { + "External id": 293215,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8747 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989738.120, "dur": 0.206, + "args": { + "External id": 293216,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8748 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989739.235, "dur": 0.209, + "args": { + "External id": 293217,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8749 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989740.296, "dur": 0.207, + "args": { + "External id": 293218,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8750 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989741.266, "dur": 0.203, + "args": { + "External id": 293219,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8751 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989742.248, "dur": 0.209, + "args": { + "External id": 293220,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8752 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989744.695, "dur": 0.208, + "args": { + "External id": 293221,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8753 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989745.636, "dur": 0.211, + "args": { + "External id": 293222,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8754 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989746.562, "dur": 0.348, + "args": { + "External id": 293223,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8755 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989747.643, "dur": 0.377, + "args": { + "External id": 293224,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8756 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989748.943, "dur": 0.438, + "args": { + "External id": 293225,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8757 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989750.088, "dur": 0.374, + "args": { + "External id": 293226,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8758 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989751.181, "dur": 0.240, + "args": { + "External id": 293227,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8759 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989752.188, "dur": 0.349, + "args": { + "External id": 293228,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8760 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989754.710, "dur": 0.307, + "args": { + "External id": 293229,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8761 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989755.724, "dur": 0.440, + "args": { + "External id": 293230,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8762 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989756.880, "dur": 0.361, + "args": { + "External id": 293231,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8763 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989757.981, "dur": 0.310, + "args": { + "External id": 293232,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8764 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989759.040, "dur": 0.342, + "args": { + "External id": 293233,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8765 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989760.160, "dur": 0.210, + "args": { + "External id": 293234,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8766 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989761.637, "dur": 0.223, + "args": { + "External id": 293235,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8767 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989762.581, "dur": 0.206, + "args": { + "External id": 293236,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8768 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989765.327, "dur": 0.207, + "args": { + "External id": 293237,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8769 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989766.290, "dur": 0.210, + "args": { + "External id": 293238,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8770 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989767.336, "dur": 0.205, + "args": { + "External id": 293239,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8771 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989768.276, "dur": 0.214, + "args": { + "External id": 293240,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8772 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989769.259, "dur": 0.206, + "args": { + "External id": 293241,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8773 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989770.196, "dur": 0.212, + "args": { + "External id": 293242,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8774 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989771.121, "dur": 0.206, + "args": { + "External id": 293243,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8775 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989772.026, "dur": 0.223, + "args": { + "External id": 293244,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8776 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989774.856, "dur": 0.210, + "args": { + "External id": 293245,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8777 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989775.791, "dur": 0.214, + "args": { + "External id": 293246,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8778 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989776.801, "dur": 0.434, + "args": { + "External id": 293247,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8779 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989777.947, "dur": 0.348, + "args": { + "External id": 293248,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8780 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989779.008, "dur": 0.360, + "args": { + "External id": 293249,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8781 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989780.093, "dur": 0.363, + "args": { + "External id": 293250,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8782 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989781.362, "dur": 0.203, + "args": { + "External id": 293251,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8783 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989782.352, "dur": 0.310, + "args": { + "External id": 293252,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8784 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989785.129, "dur": 0.395, + "args": { + "External id": 293253,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8785 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989786.241, "dur": 0.342, + "args": { + "External id": 293254,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8786 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989787.499, "dur": 0.319, + "args": { + "External id": 293255,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8787 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989788.720, "dur": 0.219, + "args": { + "External id": 293256,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8788 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368989790.086, "dur": 0.207, + "args": { + "External id": 293257,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 8789 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2070552, "tid": 2070552, + "ts": 5333368989847.174, "dur": 1543.467, + "args": { + "External id": 293258,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8790 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::stack", "pid": 2070552, "tid": 2070552, + "ts": 5333368990290.454, "dur": 1024.030, + "args": { + "External id": 293259,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8791 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990299.873, "dur": 7.975, + "args": { + "External id": 293260,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8792 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990304.105, "dur": 3.243, + "args": { + "External id": 293261,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8793 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990308.744, "dur": 3.308, + "args": { + "External id": 293262,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8794 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990310.085, "dur": 1.816, + "args": { + "External id": 293263,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8795 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990312.892, "dur": 2.967, + "args": { + "External id": 293264,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8796 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990314.916, "dur": 0.860, + "args": { + "External id": 293265,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8797 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990318.335, "dur": 1.935, + "args": { + "External id": 293266,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8798 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990319.166, "dur": 0.942, + "args": { + "External id": 293267,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8799 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990320.738, "dur": 4.259, + "args": { + "External id": 293268,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8800 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990323.944, "dur": 0.982, + "args": { + "External id": 293269,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8801 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990325.348, "dur": 1.665, + "args": { + "External id": 293270,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8802 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990326.049, "dur": 0.794, + "args": { + "External id": 293271,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8803 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990327.282, "dur": 1.566, + "args": { + "External id": 293272,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8804 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990327.897, "dur": 0.886, + "args": { + "External id": 293273,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8805 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990329.077, "dur": 3.443, + "args": { + "External id": 293274,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8806 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990331.789, "dur": 0.666, + "args": { + "External id": 293275,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8807 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990332.749, "dur": 1.674, + "args": { + "External id": 293276,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8808 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990333.376, "dur": 0.978, + "args": { + "External id": 293277,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8809 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990334.661, "dur": 2.116, + "args": { + "External id": 293278,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8810 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990335.267, "dur": 1.312, + "args": { + "External id": 293279,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8811 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990337.044, "dur": 2.667, + "args": { + "External id": 293280,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8812 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990338.900, "dur": 0.741, + "args": { + "External id": 293281,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8813 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990341.775, "dur": 1.626, + "args": { + "External id": 293282,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8814 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990342.476, "dur": 0.860, + "args": { + "External id": 293283,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8815 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990343.629, "dur": 3.624, + "args": { + "External id": 293284,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8816 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990346.328, "dur": 0.850, + "args": { + "External id": 293285,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8817 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990347.704, "dur": 1.632, + "args": { + "External id": 293286,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8818 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990348.707, "dur": 0.559, + "args": { + "External id": 293287,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8819 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990349.745, "dur": 2.055, + "args": { + "External id": 293288,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8820 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990350.568, "dur": 0.977, + "args": { + "External id": 293289,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8821 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990352.026, "dur": 3.071, + "args": { + "External id": 293290,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8822 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990354.315, "dur": 0.626, + "args": { + "External id": 293291,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8823 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990355.320, "dur": 1.230, + "args": { + "External id": 293292,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8824 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990355.712, "dur": 0.775, + "args": { + "External id": 293293,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8825 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990356.781, "dur": 1.971, + "args": { + "External id": 293294,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8826 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990357.362, "dur": 1.264, + "args": { + "External id": 293295,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8827 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990358.981, "dur": 2.422, + "args": { + "External id": 293296,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8828 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990360.600, "dur": 0.736, + "args": { + "External id": 293297,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8829 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990363.159, "dur": 1.431, + "args": { + "External id": 293298,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8830 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990363.559, "dur": 0.877, + "args": { + "External id": 293299,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8831 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990365.134, "dur": 4.566, + "args": { + "External id": 293300,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8832 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990368.818, "dur": 0.802, + "args": { + "External id": 293301,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8833 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990370.121, "dur": 1.557, + "args": { + "External id": 293302,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8834 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990370.983, "dur": 0.629, + "args": { + "External id": 293303,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8835 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990372.087, "dur": 1.473, + "args": { + "External id": 293304,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8836 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990372.454, "dur": 1.040, + "args": { + "External id": 293305,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8837 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990373.787, "dur": 3.429, + "args": { + "External id": 293306,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8838 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990376.396, "dur": 0.754, + "args": { + "External id": 293307,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8839 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990377.443, "dur": 1.408, + "args": { + "External id": 293308,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8840 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990378.026, "dur": 0.754, + "args": { + "External id": 293309,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8841 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990379.280, "dur": 2.226, + "args": { + "External id": 293310,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8842 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990379.913, "dur": 1.527, + "args": { + "External id": 293311,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8843 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990381.972, "dur": 2.542, + "args": { + "External id": 293312,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8844 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990383.746, "dur": 0.704, + "args": { + "External id": 293313,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8845 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990386.573, "dur": 1.233, + "args": { + "External id": 293314,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8846 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990386.967, "dur": 0.667, + "args": { + "External id": 293315,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8847 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990388.240, "dur": 3.548, + "args": { + "External id": 293316,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8848 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990390.986, "dur": 0.733, + "args": { + "External id": 293317,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8849 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990392.096, "dur": 1.366, + "args": { + "External id": 293318,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8850 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990392.683, "dur": 0.713, + "args": { + "External id": 293319,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8851 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990393.686, "dur": 1.484, + "args": { + "External id": 293320,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8852 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990394.286, "dur": 0.707, + "args": { + "External id": 293321,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8853 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990395.575, "dur": 2.525, + "args": { + "External id": 293322,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8854 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990397.432, "dur": 0.597, + "args": { + "External id": 293323,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8855 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990398.325, "dur": 0.870, + "args": { + "External id": 293324,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8856 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990398.700, "dur": 0.422, + "args": { + "External id": 293325,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8857 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990399.423, "dur": 2.295, + "args": { + "External id": 293326,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8858 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990399.796, "dur": 1.849, + "args": { + "External id": 293327,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8859 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990401.961, "dur": 1.293, + "args": { + "External id": 293328,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8860 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990402.672, "dur": 0.508, + "args": { + "External id": 293329,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8861 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990405.180, "dur": 1.173, + "args": { + "External id": 293330,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8862 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990405.602, "dur": 0.683, + "args": { + "External id": 293331,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8863 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990406.580, "dur": 3.711, + "args": { + "External id": 293332,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8864 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990409.466, "dur": 0.758, + "args": { + "External id": 293333,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8865 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990410.566, "dur": 2.217, + "args": { + "External id": 293334,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8866 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990411.967, "dur": 0.611, + "args": { + "External id": 293335,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8867 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990413.249, "dur": 1.642, + "args": { + "External id": 293336,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8868 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990414.171, "dur": 0.653, + "args": { + "External id": 293337,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8869 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990415.343, "dur": 2.684, + "args": { + "External id": 293338,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8870 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990417.336, "dur": 0.518, + "args": { + "External id": 293339,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8871 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990418.520, "dur": 2.030, + "args": { + "External id": 293340,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8872 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990419.591, "dur": 0.889, + "args": { + "External id": 293341,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8873 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990421.010, "dur": 3.258, + "args": { + "External id": 293342,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8874 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990422.557, "dur": 1.550, + "args": { + "External id": 293343,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8875 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990424.741, "dur": 1.825, + "args": { + "External id": 293344,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8876 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990425.973, "dur": 0.524, + "args": { + "External id": 293345,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8877 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990428.880, "dur": 2.143, + "args": { + "External id": 293346,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8878 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990429.970, "dur": 0.902, + "args": { + "External id": 293347,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8879 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990431.450, "dur": 3.844, + "args": { + "External id": 293348,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8880 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990434.327, "dur": 0.903, + "args": { + "External id": 293349,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8881 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990435.592, "dur": 1.987, + "args": { + "External id": 293350,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8882 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990436.755, "dur": 0.658, + "args": { + "External id": 293351,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8883 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990437.829, "dur": 1.733, + "args": { + "External id": 293352,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8884 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990438.877, "dur": 0.525, + "args": { + "External id": 293353,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8885 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990440.000, "dur": 3.089, + "args": { + "External id": 293354,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8886 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990442.531, "dur": 0.490, + "args": { + "External id": 293355,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8887 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990443.556, "dur": 2.145, + "args": { + "External id": 293356,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8888 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990445.050, "dur": 0.581, + "args": { + "External id": 293357,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8889 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990446.173, "dur": 2.736, + "args": { + "External id": 293358,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8890 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990447.253, "dur": 1.423, + "args": { + "External id": 293359,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8891 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990449.219, "dur": 1.784, + "args": { + "External id": 293360,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8892 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990450.338, "dur": 0.597, + "args": { + "External id": 293361,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8893 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990452.974, "dur": 1.394, + "args": { + "External id": 293362,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8894 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990453.744, "dur": 0.558, + "args": { + "External id": 293363,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8895 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990454.627, "dur": 3.194, + "args": { + "External id": 293364,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8896 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990457.277, "dur": 0.476, + "args": { + "External id": 293365,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8897 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990458.128, "dur": 1.705, + "args": { + "External id": 293366,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8898 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990458.996, "dur": 0.766, + "args": { + "External id": 293367,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8899 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990460.141, "dur": 1.975, + "args": { + "External id": 293368,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8900 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990461.295, "dur": 0.755, + "args": { + "External id": 293369,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8901 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990462.377, "dur": 3.177, + "args": { + "External id": 293370,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8902 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990464.638, "dur": 0.751, + "args": { + "External id": 293371,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8903 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990465.813, "dur": 2.556, + "args": { + "External id": 293372,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8904 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990467.405, "dur": 0.893, + "args": { + "External id": 293373,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8905 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990468.803, "dur": 2.775, + "args": { + "External id": 293374,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8906 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990469.747, "dur": 1.747, + "args": { + "External id": 293375,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8907 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990471.859, "dur": 25.787, + "args": { + "External id": 293376,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8908 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990497.076, "dur": 0.490, + "args": { + "External id": 298497,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8909 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990499.818, "dur": 2.319, + "args": { + "External id": 298498,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8910 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990500.559, "dur": 1.503, + "args": { + "External id": 298499,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8911 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990502.420, "dur": 1.578, + "args": { + "External id": 298500,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8912 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990503.511, "dur": 0.418, + "args": { + "External id": 298501,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8913 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990504.258, "dur": 1.388, + "args": { + "External id": 298502,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8914 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990505.104, "dur": 0.474, + "args": { + "External id": 298503,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8915 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990505.926, "dur": 3.085, + "args": { + "External id": 298504,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8916 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990508.529, "dur": 0.414, + "args": { + "External id": 298505,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8917 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990509.297, "dur": 1.121, + "args": { + "External id": 298506,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8918 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990510.006, "dur": 0.338, + "args": { + "External id": 298507,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8919 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990510.691, "dur": 5.653, + "args": { + "External id": 298508,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8920 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990515.218, "dur": 1.053, + "args": { + "External id": 298509,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8921 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990516.626, "dur": 1.407, + "args": { + "External id": 298510,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8922 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990517.365, "dur": 0.598, + "args": { + "External id": 298511,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8923 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990518.290, "dur": 2.654, + "args": { + "External id": 298512,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8924 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990519.997, "dur": 0.880, + "args": { + "External id": 298513,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8925 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990522.890, "dur": 2.660, + "args": { + "External id": 298514,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8926 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990524.911, "dur": 0.572, + "args": { + "External id": 298515,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8927 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990525.837, "dur": 2.109, + "args": { + "External id": 298516,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8928 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990527.259, "dur": 0.620, + "args": { + "External id": 298517,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8929 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990528.255, "dur": 1.738, + "args": { + "External id": 298518,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8930 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990529.271, "dur": 0.634, + "args": { + "External id": 298519,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8931 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990530.246, "dur": 3.284, + "args": { + "External id": 298520,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8932 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990532.693, "dur": 0.580, + "args": { + "External id": 298521,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8933 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990533.785, "dur": 1.431, + "args": { + "External id": 298522,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8934 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990534.571, "dur": 0.576, + "args": { + "External id": 298523,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8935 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990535.482, "dur": 2.687, + "args": { + "External id": 298524,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8936 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990536.807, "dur": 1.283, + "args": { + "External id": 298525,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8937 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990538.460, "dur": 1.367, + "args": { + "External id": 298526,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8938 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990539.273, "dur": 0.487, + "args": { + "External id": 298527,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8939 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990540.103, "dur": 1.926, + "args": { + "External id": 298528,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8940 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990541.283, "dur": 0.674, + "args": { + "External id": 298529,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8941 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990543.909, "dur": 2.875, + "args": { + "External id": 298530,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8942 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990546.135, "dur": 0.584, + "args": { + "External id": 298531,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8943 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990547.073, "dur": 1.884, + "args": { + "External id": 298532,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8944 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990548.120, "dur": 0.749, + "args": { + "External id": 298533,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8945 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990549.211, "dur": 1.863, + "args": { + "External id": 298534,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8946 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990550.315, "dur": 0.691, + "args": { + "External id": 298535,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8947 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990551.415, "dur": 2.736, + "args": { + "External id": 298536,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8948 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990553.739, "dur": 0.336, + "args": { + "External id": 298537,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8949 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990554.532, "dur": 1.226, + "args": { + "External id": 298538,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8950 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990555.218, "dur": 0.470, + "args": { + "External id": 298539,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8951 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990556.077, "dur": 3.152, + "args": { + "External id": 298540,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8952 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990557.261, "dur": 1.896, + "args": { + "External id": 298541,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8953 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990559.512, "dur": 1.660, + "args": { + "External id": 298542,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8954 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990560.488, "dur": 0.610, + "args": { + "External id": 298543,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8955 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990561.627, "dur": 2.206, + "args": { + "External id": 298544,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8956 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990562.850, "dur": 0.920, + "args": { + "External id": 298545,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8957 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990565.929, "dur": 3.036, + "args": { + "External id": 298546,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8958 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990568.083, "dur": 0.801, + "args": { + "External id": 298547,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8959 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990569.242, "dur": 1.366, + "args": { + "External id": 298548,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8960 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990570.003, "dur": 0.536, + "args": { + "External id": 298549,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8961 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990570.866, "dur": 1.794, + "args": { + "External id": 298550,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8962 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990571.956, "dur": 0.634, + "args": { + "External id": 298551,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8963 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990572.948, "dur": 3.086, + "args": { + "External id": 298552,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8964 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990575.287, "dur": 0.674, + "args": { + "External id": 298553,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8965 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990576.302, "dur": 2.189, + "args": { + "External id": 298554,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8966 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990577.393, "dur": 1.021, + "args": { + "External id": 298555,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8967 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990578.807, "dur": 2.781, + "args": { + "External id": 298556,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8968 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990579.891, "dur": 1.626, + "args": { + "External id": 298557,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8969 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990581.886, "dur": 1.600, + "args": { + "External id": 298558,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8970 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990582.903, "dur": 0.514, + "args": { + "External id": 298559,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8971 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990584.087, "dur": 1.996, + "args": { + "External id": 298560,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8972 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990585.308, "dur": 0.710, + "args": { + "External id": 298561,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8973 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990587.853, "dur": 2.994, + "args": { + "External id": 298562,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8974 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990590.050, "dur": 0.736, + "args": { + "External id": 298563,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8975 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990591.122, "dur": 1.500, + "args": { + "External id": 298564,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8976 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990592.138, "dur": 0.407, + "args": { + "External id": 298565,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8977 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990592.905, "dur": 1.581, + "args": { + "External id": 298566,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8978 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990593.799, "dur": 0.611, + "args": { + "External id": 298567,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8979 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990594.741, "dur": 2.849, + "args": { + "External id": 298568,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8980 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990597.092, "dur": 0.425, + "args": { + "External id": 298569,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8981 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990597.846, "dur": 1.519, + "args": { + "External id": 298570,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8982 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990598.718, "dur": 0.570, + "args": { + "External id": 298571,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8983 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990599.623, "dur": 3.233, + "args": { + "External id": 298572,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8984 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990600.939, "dur": 1.848, + "args": { + "External id": 298573,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8985 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990603.108, "dur": 1.444, + "args": { + "External id": 298574,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8986 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990603.898, "dur": 0.583, + "args": { + "External id": 298575,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8987 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990604.856, "dur": 1.732, + "args": { + "External id": 298576,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8988 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990605.866, "dur": 0.654, + "args": { + "External id": 298577,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8989 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990608.562, "dur": 3.156, + "args": { + "External id": 298578,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8990 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990611.213, "dur": 0.441, + "args": { + "External id": 298579,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8991 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990612.034, "dur": 1.940, + "args": { + "External id": 298580,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8992 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990613.204, "dur": 0.701, + "args": { + "External id": 298581,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8993 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990614.226, "dur": 1.670, + "args": { + "External id": 298582,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8994 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990615.118, "dur": 0.715, + "args": { + "External id": 298583,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8995 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990616.149, "dur": 3.565, + "args": { + "External id": 298584,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8996 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990618.915, "dur": 0.729, + "args": { + "External id": 298585,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8997 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990619.978, "dur": 28.910, + "args": { + "External id": 298586,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 8998 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990647.986, "dur": 0.559, + "args": { + "External id": 298587,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 8999 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990649.362, "dur": 3.437, + "args": { + "External id": 298588,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9000 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990650.613, "dur": 1.964, + "args": { + "External id": 298589,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9001 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990654.109, "dur": 1.466, + "args": { + "External id": 298590,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9002 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990655.031, "dur": 0.464, + "args": { + "External id": 298591,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9003 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990655.889, "dur": 2.831, + "args": { + "External id": 298592,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9004 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990657.055, "dur": 1.586, + "args": { + "External id": 298593,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9005 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990660.704, "dur": 1.377, + "args": { + "External id": 298594,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9006 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990661.401, "dur": 0.612, + "args": { + "External id": 298595,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9007 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990662.369, "dur": 1.750, + "args": { + "External id": 298596,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9008 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990663.265, "dur": 0.787, + "args": { + "External id": 298597,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9009 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990664.448, "dur": 3.091, + "args": { + "External id": 298598,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9010 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990666.846, "dur": 0.624, + "args": { + "External id": 298599,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9011 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990667.835, "dur": 1.752, + "args": { + "External id": 298600,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9012 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990668.883, "dur": 0.639, + "args": { + "External id": 298601,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9013 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990670.094, "dur": 1.637, + "args": { + "External id": 298602,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9014 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990670.985, "dur": 0.682, + "args": { + "External id": 298603,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9015 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990672.030, "dur": 3.239, + "args": { + "External id": 298604,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9016 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990674.597, "dur": 0.597, + "args": { + "External id": 298605,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9017 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990675.580, "dur": 1.144, + "args": { + "External id": 298606,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9018 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990676.179, "dur": 0.475, + "args": { + "External id": 298607,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9019 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990676.962, "dur": 3.529, + "args": { + "External id": 298608,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9020 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990678.083, "dur": 2.233, + "args": { + "External id": 298609,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9021 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990682.318, "dur": 1.689, + "args": { + "External id": 298610,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9022 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990683.208, "dur": 0.729, + "args": { + "External id": 298611,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9023 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990684.256, "dur": 1.986, + "args": { + "External id": 298612,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9024 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990685.279, "dur": 0.891, + "args": { + "External id": 298613,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9025 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990686.485, "dur": 3.048, + "args": { + "External id": 298614,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9026 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990688.945, "dur": 0.519, + "args": { + "External id": 298615,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9027 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990689.849, "dur": 1.782, + "args": { + "External id": 298616,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9028 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990690.890, "dur": 0.663, + "args": { + "External id": 298617,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9029 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990691.969, "dur": 1.469, + "args": { + "External id": 298618,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9030 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990692.699, "dur": 0.666, + "args": { + "External id": 298619,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9031 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990693.727, "dur": 2.199, + "args": { + "External id": 298620,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9032 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990695.468, "dur": 0.386, + "args": { + "External id": 298621,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9033 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990696.370, "dur": 1.569, + "args": { + "External id": 298622,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9034 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990697.259, "dur": 0.601, + "args": { + "External id": 298623,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9035 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990698.219, "dur": 3.142, + "args": { + "External id": 298624,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9036 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990699.474, "dur": 1.811, + "args": { + "External id": 298625,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9037 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990703.525, "dur": 1.710, + "args": { + "External id": 298626,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9038 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990704.407, "dur": 0.757, + "args": { + "External id": 298627,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9039 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990705.612, "dur": 2.147, + "args": { + "External id": 298628,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9040 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990706.748, "dur": 0.940, + "args": { + "External id": 298629,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9041 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990708.003, "dur": 2.787, + "args": { + "External id": 298630,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9042 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990710.189, "dur": 0.530, + "args": { + "External id": 298631,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9043 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990711.100, "dur": 2.105, + "args": { + "External id": 298632,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9044 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990712.500, "dur": 0.637, + "args": { + "External id": 298633,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9045 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990713.447, "dur": 1.486, + "args": { + "External id": 298634,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9046 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990714.292, "dur": 0.577, + "args": { + "External id": 298635,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9047 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990715.210, "dur": 2.751, + "args": { + "External id": 298636,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9048 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990717.346, "dur": 0.536, + "args": { + "External id": 298637,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9049 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990718.199, "dur": 1.498, + "args": { + "External id": 298638,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9050 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990719.105, "dur": 0.526, + "args": { + "External id": 298639,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9051 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990720.011, "dur": 2.666, + "args": { + "External id": 298640,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9052 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990721.293, "dur": 1.309, + "args": { + "External id": 298641,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9053 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990724.830, "dur": 1.654, + "args": { + "External id": 298642,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9054 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990725.679, "dur": 0.736, + "args": { + "External id": 298643,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9055 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990726.846, "dur": 2.027, + "args": { + "External id": 298644,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9056 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990727.867, "dur": 0.941, + "args": { + "External id": 298645,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9057 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990729.115, "dur": 3.486, + "args": { + "External id": 298646,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9058 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990731.601, "dur": 0.929, + "args": { + "External id": 298647,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9059 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990732.943, "dur": 1.646, + "args": { + "External id": 298648,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9060 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990733.833, "dur": 0.677, + "args": { + "External id": 298649,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9061 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990734.849, "dur": 1.822, + "args": { + "External id": 298650,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9062 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990735.762, "dur": 0.841, + "args": { + "External id": 298651,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9063 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990736.943, "dur": 3.193, + "args": { + "External id": 298652,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9064 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990739.528, "dur": 0.541, + "args": { + "External id": 298653,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9065 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990740.386, "dur": 1.648, + "args": { + "External id": 298654,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9066 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990741.132, "dur": 0.833, + "args": { + "External id": 298655,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9067 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990742.299, "dur": 2.808, + "args": { + "External id": 298656,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9068 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990743.617, "dur": 1.410, + "args": { + "External id": 298657,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9069 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990747.022, "dur": 1.325, + "args": { + "External id": 298658,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9070 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990747.789, "dur": 0.488, + "args": { + "External id": 298659,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9071 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990748.625, "dur": 2.210, + "args": { + "External id": 298660,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9072 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990750.064, "dur": 0.698, + "args": { + "External id": 298661,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9073 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990751.137, "dur": 3.273, + "args": { + "External id": 298662,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9074 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990753.825, "dur": 0.517, + "args": { + "External id": 298663,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9075 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990754.708, "dur": 1.954, + "args": { + "External id": 298664,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9076 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990756.019, "dur": 0.565, + "args": { + "External id": 298665,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9077 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990756.928, "dur": 1.587, + "args": { + "External id": 298666,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9078 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990757.822, "dur": 0.626, + "args": { + "External id": 298667,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9079 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990758.761, "dur": 2.719, + "args": { + "External id": 298668,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9080 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990760.795, "dur": 0.615, + "args": { + "External id": 298669,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9081 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990761.718, "dur": 1.397, + "args": { + "External id": 298670,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9082 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990762.475, "dur": 0.572, + "args": { + "External id": 298671,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9083 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990763.380, "dur": 2.752, + "args": { + "External id": 298672,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9084 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990764.390, "dur": 1.670, + "args": { + "External id": 298673,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9085 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990767.984, "dur": 1.160, + "args": { + "External id": 298674,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9086 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990768.660, "dur": 0.417, + "args": { + "External id": 298675,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9087 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990769.509, "dur": 1.590, + "args": { + "External id": 298676,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9088 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990770.449, "dur": 0.586, + "args": { + "External id": 298677,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9089 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990771.337, "dur": 2.833, + "args": { + "External id": 298678,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9090 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990773.691, "dur": 0.410, + "args": { + "External id": 298679,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9091 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990774.527, "dur": 1.601, + "args": { + "External id": 298680,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9092 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990775.597, "dur": 0.453, + "args": { + "External id": 298681,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9093 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990776.415, "dur": 1.593, + "args": { + "External id": 298682,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9094 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990777.252, "dur": 0.690, + "args": { + "External id": 298683,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9095 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990778.248, "dur": 2.595, + "args": { + "External id": 298684,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9096 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990780.267, "dur": 0.500, + "args": { + "External id": 298685,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9097 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990781.364, "dur": 1.489, + "args": { + "External id": 298686,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9098 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990782.155, "dur": 0.624, + "args": { + "External id": 298687,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9099 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990783.117, "dur": 3.278, + "args": { + "External id": 298688,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9100 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990784.368, "dur": 1.878, + "args": { + "External id": 298689,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9101 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990788.253, "dur": 1.495, + "args": { + "External id": 298690,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9102 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990789.149, "dur": 0.526, + "args": { + "External id": 298691,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9103 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990790.038, "dur": 2.441, + "args": { + "External id": 298692,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9104 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990791.377, "dur": 1.040, + "args": { + "External id": 298693,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9105 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990792.738, "dur": 2.769, + "args": { + "External id": 298694,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9106 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990794.953, "dur": 0.486, + "args": { + "External id": 298695,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9107 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990795.805, "dur": 1.770, + "args": { + "External id": 298696,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9108 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990796.731, "dur": 0.775, + "args": { + "External id": 298697,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9109 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990797.851, "dur": 1.617, + "args": { + "External id": 298698,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9110 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990798.737, "dur": 0.666, + "args": { + "External id": 298699,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9111 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990799.707, "dur": 2.964, + "args": { + "External id": 298700,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9112 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990802.087, "dur": 0.517, + "args": { + "External id": 298701,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9113 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990803.006, "dur": 1.191, + "args": { + "External id": 298702,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9114 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990803.672, "dur": 0.455, + "args": { + "External id": 298703,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9115 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990804.459, "dur": 2.708, + "args": { + "External id": 298704,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9116 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990805.387, "dur": 1.713, + "args": { + "External id": 298705,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9117 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990809.103, "dur": 1.535, + "args": { + "External id": 298706,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9118 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990809.965, "dur": 0.604, + "args": { + "External id": 298707,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9119 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990810.995, "dur": 1.701, + "args": { + "External id": 298708,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9120 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990811.871, "dur": 0.763, + "args": { + "External id": 298709,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9121 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990812.951, "dur": 3.215, + "args": { + "External id": 298710,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9122 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990815.467, "dur": 0.621, + "args": { + "External id": 298711,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9123 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990816.426, "dur": 1.706, + "args": { + "External id": 298712,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9124 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990817.530, "dur": 0.529, + "args": { + "External id": 298713,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9125 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990818.367, "dur": 1.832, + "args": { + "External id": 298714,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9126 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990819.551, "dur": 0.583, + "args": { + "External id": 298715,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9127 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990820.438, "dur": 3.053, + "args": { + "External id": 298716,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9128 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990822.735, "dur": 0.685, + "args": { + "External id": 298717,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9129 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990823.777, "dur": 1.520, + "args": { + "External id": 298718,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9130 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990824.629, "dur": 0.599, + "args": { + "External id": 298719,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9131 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990825.570, "dur": 3.012, + "args": { + "External id": 298720,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9132 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990826.717, "dur": 1.798, + "args": { + "External id": 298721,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9133 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990830.515, "dur": 1.553, + "args": { + "External id": 298722,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9134 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990831.408, "dur": 0.589, + "args": { + "External id": 298723,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9135 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990832.506, "dur": 1.881, + "args": { + "External id": 298724,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9136 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990833.638, "dur": 0.687, + "args": { + "External id": 298725,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9137 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990834.690, "dur": 2.909, + "args": { + "External id": 298726,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9138 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990837.034, "dur": 0.495, + "args": { + "External id": 298727,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9139 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990837.869, "dur": 1.961, + "args": { + "External id": 298728,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9140 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990839.310, "dur": 0.449, + "args": { + "External id": 298729,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9141 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990840.074, "dur": 1.558, + "args": { + "External id": 298730,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9142 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990841.005, "dur": 0.564, + "args": { + "External id": 298731,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9143 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990841.896, "dur": 3.339, + "args": { + "External id": 298732,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9144 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990844.566, "dur": 0.601, + "args": { + "External id": 298733,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9145 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990845.595, "dur": 1.213, + "args": { + "External id": 298734,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9146 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990846.375, "dur": 0.367, + "args": { + "External id": 298735,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9147 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990847.153, "dur": 2.896, + "args": { + "External id": 298736,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9148 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990848.409, "dur": 1.572, + "args": { + "External id": 298737,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9149 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990852.270, "dur": 1.491, + "args": { + "External id": 298738,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9150 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990853.384, "dur": 0.307, + "args": { + "External id": 298739,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9151 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990853.997, "dur": 2.356, + "args": { + "External id": 298740,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9152 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990855.636, "dur": 0.646, + "args": { + "External id": 298741,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9153 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990856.635, "dur": 3.151, + "args": { + "External id": 298742,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9154 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990859.083, "dur": 0.633, + "args": { + "External id": 298743,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9155 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990860.056, "dur": 1.777, + "args": { + "External id": 298744,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9156 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990861.050, "dur": 0.710, + "args": { + "External id": 298745,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9157 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990862.341, "dur": 1.442, + "args": { + "External id": 298746,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9158 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990863.150, "dur": 0.570, + "args": { + "External id": 298747,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9159 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990864.026, "dur": 2.977, + "args": { + "External id": 298748,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9160 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990866.148, "dur": 0.784, + "args": { + "External id": 298749,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9161 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990867.280, "dur": 1.554, + "args": { + "External id": 298750,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9162 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990868.188, "dur": 0.575, + "args": { + "External id": 298751,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9163 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990869.121, "dur": 1.913, + "args": { + "External id": 298752,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9164 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990869.831, "dur": 1.123, + "args": { + "External id": 298753,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9165 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990873.067, "dur": 1.635, + "args": { + "External id": 298754,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9166 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990873.930, "dur": 0.695, + "args": { + "External id": 298755,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9167 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990874.964, "dur": 1.605, + "args": { + "External id": 298756,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9168 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990875.629, "dur": 0.877, + "args": { + "External id": 298757,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9169 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990876.836, "dur": 3.518, + "args": { + "External id": 298758,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9170 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990879.500, "dur": 0.782, + "args": { + "External id": 298759,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9171 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990880.688, "dur": 1.499, + "args": { + "External id": 298760,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9172 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990881.523, "dur": 0.584, + "args": { + "External id": 298761,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9173 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990882.430, "dur": 1.489, + "args": { + "External id": 298762,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9174 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990883.284, "dur": 0.570, + "args": { + "External id": 298763,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9175 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990884.183, "dur": 2.412, + "args": { + "External id": 298764,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9176 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990885.936, "dur": 0.589, + "args": { + "External id": 298765,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9177 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990886.834, "dur": 1.653, + "args": { + "External id": 298766,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9178 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990887.857, "dur": 0.559, + "args": { + "External id": 298767,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9179 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990888.758, "dur": 2.321, + "args": { + "External id": 298768,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9180 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990889.480, "dur": 1.420, + "args": { + "External id": 298769,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9181 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990892.831, "dur": 1.702, + "args": { + "External id": 298770,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9182 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990893.709, "dur": 0.752, + "args": { + "External id": 298771,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9183 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990894.779, "dur": 1.881, + "args": { + "External id": 298772,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9184 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990895.914, "dur": 0.680, + "args": { + "External id": 298773,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9185 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990896.903, "dur": 2.646, + "args": { + "External id": 298774,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9186 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990898.672, "dur": 0.798, + "args": { + "External id": 298775,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9187 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990899.832, "dur": 1.641, + "args": { + "External id": 298776,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9188 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990900.894, "dur": 0.509, + "args": { + "External id": 298777,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9189 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990901.720, "dur": 1.411, + "args": { + "External id": 298778,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9190 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990902.299, "dur": 0.760, + "args": { + "External id": 298779,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9191 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990903.423, "dur": 2.282, + "args": { + "External id": 298780,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9192 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990904.999, "dur": 0.637, + "args": { + "External id": 298781,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9193 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990905.979, "dur": 2.155, + "args": { + "External id": 298782,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9194 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990907.083, "dur": 0.983, + "args": { + "External id": 298783,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9195 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990908.464, "dur": 2.441, + "args": { + "External id": 298784,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9196 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990909.291, "dur": 1.539, + "args": { + "External id": 298785,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9197 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990912.712, "dur": 1.312, + "args": { + "External id": 298786,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9198 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990913.299, "dur": 0.652, + "args": { + "External id": 298787,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9199 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990914.421, "dur": 1.455, + "args": { + "External id": 298788,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9200 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990915.101, "dur": 0.710, + "args": { + "External id": 298789,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9201 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990916.118, "dur": 2.962, + "args": { + "External id": 298790,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9202 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990918.409, "dur": 0.598, + "args": { + "External id": 298791,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9203 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990919.346, "dur": 1.390, + "args": { + "External id": 298792,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9204 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990919.998, "dur": 0.673, + "args": { + "External id": 298793,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9205 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990920.971, "dur": 1.747, + "args": { + "External id": 298794,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9206 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990921.948, "dur": 0.596, + "args": { + "External id": 298795,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9207 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990922.998, "dur": 3.176, + "args": { + "External id": 298796,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9208 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990925.331, "dur": 0.779, + "args": { + "External id": 298797,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9209 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990926.410, "dur": 1.627, + "args": { + "External id": 298798,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9210 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990927.436, "dur": 0.522, + "args": { + "External id": 298799,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9211 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990928.273, "dur": 2.874, + "args": { + "External id": 298800,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9212 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990929.445, "dur": 1.619, + "args": { + "External id": 298801,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9213 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990932.832, "dur": 1.329, + "args": { + "External id": 298802,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9214 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990933.468, "dur": 0.613, + "args": { + "External id": 298803,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9215 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990934.491, "dur": 1.917, + "args": { + "External id": 298804,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9216 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990935.467, "dur": 0.875, + "args": { + "External id": 298805,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9217 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990936.648, "dur": 3.213, + "args": { + "External id": 298806,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9218 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990939.057, "dur": 0.726, + "args": { + "External id": 298807,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9219 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990940.147, "dur": 1.875, + "args": { + "External id": 298808,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9220 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990941.167, "dur": 0.787, + "args": { + "External id": 298809,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9221 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990942.296, "dur": 1.464, + "args": { + "External id": 298810,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9222 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990943.006, "dur": 0.663, + "args": { + "External id": 298811,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9223 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990944.034, "dur": 3.129, + "args": { + "External id": 298812,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9224 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990946.304, "dur": 0.665, + "args": { + "External id": 298813,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9225 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990947.412, "dur": 1.609, + "args": { + "External id": 298814,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9226 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990948.288, "dur": 0.661, + "args": { + "External id": 298815,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9227 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990949.282, "dur": 2.804, + "args": { + "External id": 298816,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9228 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990949.915, "dur": 2.105, + "args": { + "External id": 298817,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9229 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990953.836, "dur": 1.704, + "args": { + "External id": 298818,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9230 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990954.772, "dur": 0.687, + "args": { + "External id": 298819,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9231 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990955.795, "dur": 1.744, + "args": { + "External id": 298820,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9232 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990956.536, "dur": 0.836, + "args": { + "External id": 298821,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9233 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990957.803, "dur": 2.799, + "args": { + "External id": 298822,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9234 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990959.931, "dur": 0.602, + "args": { + "External id": 298823,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9235 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990960.878, "dur": 1.358, + "args": { + "External id": 298824,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9236 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990961.514, "dur": 0.652, + "args": { + "External id": 298825,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9237 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990962.479, "dur": 1.755, + "args": { + "External id": 298826,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9238 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990963.382, "dur": 0.777, + "args": { + "External id": 298827,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9239 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990964.473, "dur": 2.621, + "args": { + "External id": 298828,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9240 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990966.507, "dur": 0.513, + "args": { + "External id": 298829,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9241 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990967.378, "dur": 1.238, + "args": { + "External id": 298830,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9242 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990968.122, "dur": 0.425, + "args": { + "External id": 298831,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9243 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990968.854, "dur": 2.310, + "args": { + "External id": 298832,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9244 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990969.520, "dur": 1.481, + "args": { + "External id": 298833,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9245 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990972.980, "dur": 1.502, + "args": { + "External id": 298834,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9246 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990973.887, "dur": 0.527, + "args": { + "External id": 298835,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9247 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990974.751, "dur": 1.641, + "args": { + "External id": 298836,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9248 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990975.485, "dur": 0.746, + "args": { + "External id": 298837,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9249 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990976.654, "dur": 3.353, + "args": { + "External id": 298838,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9250 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990979.165, "dur": 0.770, + "args": { + "External id": 298839,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9251 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990980.293, "dur": 1.636, + "args": { + "External id": 298840,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9252 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990981.050, "dur": 0.801, + "args": { + "External id": 298841,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9253 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990982.173, "dur": 1.752, + "args": { + "External id": 298842,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9254 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990983.031, "dur": 0.740, + "args": { + "External id": 298843,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9255 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990984.171, "dur": 2.802, + "args": { + "External id": 298844,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9256 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990986.334, "dur": 0.564, + "args": { + "External id": 298845,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9257 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990988.684, "dur": 1.622, + "args": { + "External id": 298846,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9258 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990989.549, "dur": 0.691, + "args": { + "External id": 298847,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9259 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990990.600, "dur": 2.314, + "args": { + "External id": 298848,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9260 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990991.546, "dur": 1.205, + "args": { + "External id": 298849,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9261 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990994.954, "dur": 1.238, + "args": { + "External id": 298850,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9262 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990995.563, "dur": 0.549, + "args": { + "External id": 298851,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9263 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990996.567, "dur": 1.315, + "args": { + "External id": 298852,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9264 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368990997.331, "dur": 0.487, + "args": { + "External id": 298853,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9265 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368990998.179, "dur": 2.890, + "args": { + "External id": 298854,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9266 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991000.154, "dur": 0.848, + "args": { + "External id": 298855,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9267 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991001.346, "dur": 1.646, + "args": { + "External id": 298856,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9268 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991002.112, "dur": 0.691, + "args": { + "External id": 298857,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9269 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991003.236, "dur": 1.495, + "args": { + "External id": 298858,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9270 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991003.917, "dur": 0.514, + "args": { + "External id": 298859,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9271 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991004.986, "dur": 3.118, + "args": { + "External id": 298860,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9272 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991007.391, "dur": 0.642, + "args": { + "External id": 298861,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9273 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991008.390, "dur": 1.553, + "args": { + "External id": 298862,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9274 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991009.216, "dur": 0.552, + "args": { + "External id": 298863,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9275 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991010.213, "dur": 2.173, + "args": { + "External id": 298864,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9276 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991011.075, "dur": 1.241, + "args": { + "External id": 298865,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9277 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991014.484, "dur": 1.785, + "args": { + "External id": 298866,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9278 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991015.517, "dur": 0.682, + "args": { + "External id": 298867,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9279 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991016.518, "dur": 1.762, + "args": { + "External id": 298868,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9280 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991017.320, "dur": 0.891, + "args": { + "External id": 298869,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9281 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991018.544, "dur": 3.110, + "args": { + "External id": 298870,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9282 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991020.833, "dur": 0.749, + "args": { + "External id": 298871,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9283 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991021.926, "dur": 1.392, + "args": { + "External id": 298872,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9284 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991022.736, "dur": 0.512, + "args": { + "External id": 298873,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9285 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991023.555, "dur": 1.678, + "args": { + "External id": 298874,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9286 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991024.349, "dur": 0.724, + "args": { + "External id": 298875,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9287 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991025.471, "dur": 3.771, + "args": { + "External id": 298876,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9288 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991028.266, "dur": 0.904, + "args": { + "External id": 298877,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9289 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991029.481, "dur": 1.701, + "args": { + "External id": 298878,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9290 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991030.518, "dur": 0.592, + "args": { + "External id": 298879,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9291 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991031.417, "dur": 2.946, + "args": { + "External id": 298880,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9292 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991032.326, "dur": 1.965, + "args": { + "External id": 298881,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9293 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991036.227, "dur": 1.588, + "args": { + "External id": 298882,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9294 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991037.073, "dur": 0.675, + "args": { + "External id": 298883,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9295 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991038.086, "dur": 1.643, + "args": { + "External id": 298884,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9296 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991038.851, "dur": 0.806, + "args": { + "External id": 298885,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9297 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991039.974, "dur": 3.517, + "args": { + "External id": 298886,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9298 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991042.885, "dur": 0.534, + "args": { + "External id": 298887,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9299 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991043.808, "dur": 1.727, + "args": { + "External id": 298888,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9300 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991044.433, "dur": 0.923, + "args": { + "External id": 298889,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9301 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991045.816, "dur": 1.941, + "args": { + "External id": 298890,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9302 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991046.895, "dur": 0.783, + "args": { + "External id": 298891,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9303 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991048.041, "dur": 3.016, + "args": { + "External id": 298892,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9304 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991050.432, "dur": 0.450, + "args": { + "External id": 298893,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9305 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991051.296, "dur": 1.670, + "args": { + "External id": 298894,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9306 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991052.071, "dur": 0.815, + "args": { + "External id": 298895,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9307 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991053.206, "dur": 2.369, + "args": { + "External id": 298896,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9308 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991053.779, "dur": 1.723, + "args": { + "External id": 298897,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9309 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991057.339, "dur": 1.445, + "args": { + "External id": 298898,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9310 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991058.202, "dur": 0.500, + "args": { + "External id": 298899,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9311 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991059.235, "dur": 1.640, + "args": { + "External id": 298900,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9312 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991059.880, "dur": 0.930, + "args": { + "External id": 298901,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9313 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991061.126, "dur": 3.543, + "args": { + "External id": 298902,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9314 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991063.808, "dur": 0.788, + "args": { + "External id": 298903,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9315 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991064.959, "dur": 1.780, + "args": { + "External id": 298904,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9316 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991065.789, "dur": 0.784, + "args": { + "External id": 298905,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9317 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991067.003, "dur": 1.562, + "args": { + "External id": 298906,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9318 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991067.756, "dur": 0.741, + "args": { + "External id": 298907,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9319 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991068.810, "dur": 2.970, + "args": { + "External id": 298908,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9320 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991071.107, "dur": 0.605, + "args": { + "External id": 298909,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9321 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991072.033, "dur": 1.489, + "args": { + "External id": 298910,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9322 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991072.840, "dur": 0.601, + "args": { + "External id": 298911,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9323 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991073.757, "dur": 2.963, + "args": { + "External id": 298912,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9324 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991074.888, "dur": 1.758, + "args": { + "External id": 298913,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9325 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991078.696, "dur": 1.547, + "args": { + "External id": 298914,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9326 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991079.357, "dur": 0.817, + "args": { + "External id": 298915,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9327 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991080.586, "dur": 1.824, + "args": { + "External id": 298916,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9328 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991081.488, "dur": 0.858, + "args": { + "External id": 298917,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9329 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991082.654, "dur": 3.291, + "args": { + "External id": 298918,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9330 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991085.317, "dur": 0.556, + "args": { + "External id": 298919,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9331 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991086.211, "dur": 1.689, + "args": { + "External id": 298920,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9332 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991087.037, "dur": 0.797, + "args": { + "External id": 298921,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9333 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991088.181, "dur": 1.513, + "args": { + "External id": 298922,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9334 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991088.797, "dur": 0.831, + "args": { + "External id": 298923,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9335 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991089.938, "dur": 3.088, + "args": { + "External id": 298924,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9336 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991092.146, "dur": 0.595, + "args": { + "External id": 298925,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9337 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991093.266, "dur": 1.369, + "args": { + "External id": 298926,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9338 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991094.015, "dur": 0.550, + "args": { + "External id": 298927,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9339 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991094.897, "dur": 2.614, + "args": { + "External id": 298928,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9340 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991095.677, "dur": 1.762, + "args": { + "External id": 298929,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9341 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991099.261, "dur": 1.312, + "args": { + "External id": 298930,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9342 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991099.919, "dur": 0.579, + "args": { + "External id": 298931,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9343 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991100.808, "dur": 1.957, + "args": { + "External id": 298932,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9344 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991101.841, "dur": 0.860, + "args": { + "External id": 298933,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9345 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991103.035, "dur": 2.966, + "args": { + "External id": 298934,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9346 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991105.414, "dur": 0.505, + "args": { + "External id": 298935,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9347 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991106.261, "dur": 1.338, + "args": { + "External id": 298936,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9348 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991106.970, "dur": 0.560, + "args": { + "External id": 298937,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9349 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991107.879, "dur": 2.031, + "args": { + "External id": 298938,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9350 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991108.622, "dur": 1.013, + "args": { + "External id": 298939,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9351 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991110.168, "dur": 2.804, + "args": { + "External id": 298940,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9352 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991112.239, "dur": 0.571, + "args": { + "External id": 298941,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9353 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991113.290, "dur": 1.562, + "args": { + "External id": 298942,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9354 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991114.257, "dur": 0.524, + "args": { + "External id": 298943,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9355 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991115.104, "dur": 2.628, + "args": { + "External id": 298944,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9356 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991115.679, "dur": 1.776, + "args": { + "External id": 298945,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9357 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991119.765, "dur": 1.619, + "args": { + "External id": 298946,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9358 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991120.632, "dur": 0.682, + "args": { + "External id": 298947,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9359 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991121.711, "dur": 1.774, + "args": { + "External id": 298948,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9360 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991122.716, "dur": 0.705, + "args": { + "External id": 298949,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9361 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991123.728, "dur": 2.576, + "args": { + "External id": 298950,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9362 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991125.697, "dur": 0.527, + "args": { + "External id": 298951,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9363 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991126.577, "dur": 1.867, + "args": { + "External id": 298952,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9364 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991127.438, "dur": 0.934, + "args": { + "External id": 298953,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9365 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991128.691, "dur": 1.661, + "args": { + "External id": 298954,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9366 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991129.644, "dur": 0.644, + "args": { + "External id": 298955,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9367 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991130.610, "dur": 2.574, + "args": { + "External id": 298956,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9368 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991132.690, "dur": 0.430, + "args": { + "External id": 298957,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9369 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991133.456, "dur": 1.506, + "args": { + "External id": 298958,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9370 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991134.192, "dur": 0.702, + "args": { + "External id": 298959,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9371 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991135.203, "dur": 2.518, + "args": { + "External id": 298960,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9372 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991135.860, "dur": 1.782, + "args": { + "External id": 298961,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9373 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::unsqueeze", "pid": 2070552, "tid": 2070552, + "ts": 5333368991139.928, "dur": 1.270, + "args": { + "External id": 298962,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9374 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991140.681, "dur": 0.442, + "args": { + "External id": 298963,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[1]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9375 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::cat", "pid": 2070552, "tid": 2070552, + "ts": 5333368991158.764, "dur": 144.028, + "args": { + "External id": 298964,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9376 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 2070552, "tid": 2070552, + "ts": 5333368991403.717, "dur": 120.465, + "args": { + "External id": 298965,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[292], [], [], [], []], "Ev Idx": 9377 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::linalg_vector_norm", "pid": 2070552, "tid": 2070552, + "ts": 5333368991460.011, "dur": 45.018, + "args": { + "External id": 298966,"Record function id": 0, "Concrete Inputs": ["", "2.", "", "False", ""], "Input type": ["float", "Scalar", "", "Scalar", ""], "Input Strides": [[1], [], [], [], []], "Input Dims": [[292], [], [], [], []], "Ev Idx": 9378 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::as_strided", "pid": 2070552, "tid": 2070552, + "ts": 5333368991472.504, "dur": 1.201, + "args": { + "External id": 298967,"Record function id": 0, "Concrete Inputs": ["", "[1]", "[0]", ""], "Input type": ["float", "ScalarList", "ScalarList", ""], "Input Strides": [[], [], [], []], "Input Dims": [[], [], [], []], "Ev Idx": 9379 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "Redistribute", "pid": 2070552, "tid": 2070552, + "ts": 5333368991938.790, "dur": 911.361, + "args": { + "External id": 298968,"Sequence number": 1209237, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", "False"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9380 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2070552, "tid": 2070552, + "ts": 5333368991984.331, "dur": 58.096, + "args": { + "External id": 298969,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9381 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368991988.551, "dur": 1.288, + "args": { + "External id": 298970,"Record function id": 0, "Concrete Inputs": ["", "2."], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9382 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368991992.360, "dur": 0.732, + "args": { + "External id": 298971,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 9383 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::all_reduce", "pid": 2070552, "tid": 2070552, + "ts": 5333368992064.613, "dur": 453.252, + "args": { + "External id": 298972,"Record function id": 0, "Concrete Inputs": ["", "", ""], "Input type": ["float", "", ""], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 9384 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clone", "pid": 2070552, "tid": 2070552, + "ts": 5333368992068.942, "dur": 46.582, + "args": { + "External id": 298973,"Record function id": 0, "Concrete Inputs": ["", "0"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9385 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty_like", "pid": 2070552, "tid": 2070552, + "ts": 5333368992072.549, "dur": 11.048, + "args": { + "External id": 298974,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "0"], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9386 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333368992078.426, "dur": 4.396, + "args": { + "External id": 298975,"Record function id": 0, "Concrete Inputs": ["[]", "6", "0", "", "", "0"], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", "Scalar"], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9387 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::copy_", "pid": 2070552, "tid": 2070552, + "ts": 5333368992085.500, "dur": 29.526, + "args": { + "External id": 298976,"Record function id": 0, "Concrete Inputs": ["", "", "False"], "Input type": ["float", "float", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 9388 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "c10d::allreduce_", "pid": 2070552, "tid": 2070552, + "ts": 5333368992124.181, "dur": 390.788, + "args": { + "External id": 298977,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "-1"], "Input type": ["TensorList", "", "", "", "Scalar"], "Input Strides": [[[]], [], [], [], []], "Input Dims": [[[]], [], [], [], []], "Ev Idx": 9389 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333368992154.346, "dur": 354.094, + "args": { + "External id": 298978,"Record function id": 0, "Collective name": "allreduce", "Process Group Description": "default_pg", "dtype": "Float", "Rank": 5, "Input Strides": [[[]], [], [], [], [], [], [], [], [], []], "Concrete Inputs": ["", "", "", "5", "", "[]", "[]", "0", "1", "8"], "Out msg nelems": 1, "Process Group Name": "0", "Input type": ["TensorList", "", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Global rank stride": 1, "Out split size": "[]", "In split size": "[]", "Process Group Ranks": "[0, 1, 2, 3, 4, 5, 6, 7]", "Group size": 8, "Input Dims": [[[]], [], [], [], [], [], [], [], [], []], "Global rank start": 0, "Ev Idx": 9390, "In msg nelems": 1 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "nccl:all_reduce", "pid": 2070552, "tid": 2070552, + "ts": 5333368992188.697, "dur": 314.463, + "args": { + "External id": 298979,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9391 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2070552, "tid": 2070552, + "ts": 5333368992577.946, "dur": 234.227, + "args": { + "External id": 298980,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9392 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_c10d_functional::wait_tensor", "pid": 2070552, "tid": 2070552, + "ts": 5333368992696.165, "dur": 34.526, + "args": { + "External id": 298981,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9393 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "record_param_comms", "pid": 2070552, "tid": 2070552, + "ts": 5333368992715.535, "dur": 5.119, + "args": { + "External id": 298982,"Record function id": 0, "Collective name": "wait", "Process Group Description": "default_pg", "dtype": "Byte", "Input Dims": [[], [], [], [], [], [], [], [], []], "Input Strides": [[], [], [], [], [], [], [], [], []], "Input type": ["", "", "Scalar", "", "ScalarList", "ScalarList", "Scalar", "Scalar", "Scalar"], "Concrete Inputs": ["", "", "5", "", "[]", "[]", "-1", "-1", "1"], "Out msg nelems": 0, "Process Group Name": "0", "Process Group Ranks": "[]", "Group size": 1, "Ev Idx": 9394, "In msg nelems": 0, "Rank": 5, "In split size": "[]", "Out split size": "[]" + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::pow", "pid": 2070552, "tid": 2070552, + "ts": 5333368992761.549, "dur": 44.404, + "args": { + "External id": 298983,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9395 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368992764.086, "dur": 1.403, + "args": { + "External id": 298984,"Record function id": 0, "Concrete Inputs": ["", "0.5"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9396 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368992767.192, "dur": 0.608, + "args": { + "External id": 298985,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 9397 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "_ToTorchTensor", "pid": 2070552, "tid": 2070552, + "ts": 5333368992866.370, "dur": 21.669, + "args": { + "External id": 298986,"Sequence number": 1209238, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", ""], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9398 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view_as", "pid": 2070552, "tid": 2070552, + "ts": 5333368992876.213, "dur": 8.569, + "args": { + "External id": 298987,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9399 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::view", "pid": 2070552, "tid": 2070552, + "ts": 5333368992878.678, "dur": 5.919, + "args": { + "External id": 298988,"Record function id": 0, "Concrete Inputs": ["", "[]"], "Input type": ["float", "ScalarList"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9400 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::add", "pid": 2070552, "tid": 2070552, + "ts": 5333368993363.203, "dur": 45.385, + "args": { + "External id": 298989,"Record function id": 0, "Concrete Inputs": ["", "", "1"], "Input type": ["float", "double", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 9401 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::reciprocal", "pid": 2070552, "tid": 2070552, + "ts": 5333368993417.769, "dur": 20.148, + "args": { + "External id": 298990,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9402 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::mul", "pid": 2070552, "tid": 2070552, + "ts": 5333368993445.134, "dur": 27.942, + "args": { + "External id": 298991,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "double"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9403 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::clamp", "pid": 2070552, "tid": 2070552, + "ts": 5333368993488.961, "dur": 23.206, + "args": { + "External id": 298992,"Record function id": 0, "Concrete Inputs": ["", "", "1."], "Input type": ["float", "", "Scalar"], "Input Strides": [[], [], []], "Input Dims": [[], [], []], "Ev Idx": 9404 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368993491.792, "dur": 0.751, + "args": { + "External id": 298993,"Record function id": 0, "Concrete Inputs": ["", "6", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], []], "Input Dims": [[], [], [], [], []], "Ev Idx": 9405 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::to", "pid": 2070552, "tid": 2070552, + "ts": 5333368993529.680, "dur": 0.721, + "args": { + "External id": 298994,"Record function id": 0, "Concrete Inputs": ["", "6", "0", "", "", "False", "False", ""], "Input type": ["float", "Scalar", "Scalar", "", "", "Scalar", "Scalar", ""], "Input Strides": [[], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], []], "Ev Idx": 9406 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 2070552, "tid": 2070552, + "ts": 5333368993682.998, "dur": 833.578, + "args": { + "External id": 298995,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9407 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_mul_", "pid": 2070552, "tid": 2070552, + "ts": 5333368994185.215, "dur": 299.736, + "args": { + "External id": 298996,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["TensorList", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9408 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isnan", "pid": 2070552, "tid": 2070552, + "ts": 5333368994561.638, "dur": 35.651, + "args": { + "External id": 298997,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9409 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::ne", "pid": 2070552, "tid": 2070552, + "ts": 5333368994565.558, "dur": 31.172, + "args": { + "External id": 298998,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9410 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2070552, "tid": 2070552, + "ts": 5333368994601.409, "dur": 331.869, + "args": { + "External id": 298999,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9411 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2070552, "tid": 2070552, + "ts": 5333368994603.355, "dur": 329.625, + "args": { + "External id": 299000,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9412 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2070552, "tid": 2070552, + "ts": 5333368994605.102, "dur": 326.767, + "args": { + "External id": 299001,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9413 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::isinf", "pid": 2070552, "tid": 2070552, + "ts": 5333368994941.901, "dur": 63.349, + "args": { + "External id": 299002,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9414 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333368994944.712, "dur": 34.347, + "args": { + "External id": 299003,"Record function id": 0, "Concrete Inputs": [""], "Input type": ["float"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9415 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::empty", "pid": 2070552, "tid": 2070552, + "ts": 5333368994950.106, "dur": 3.264, + "args": { + "External id": 299004,"Record function id": 0, "Concrete Inputs": ["[0]", "6", "0", "", "", ""], "Input type": ["ScalarList", "Scalar", "Scalar", "", "", ""], "Input Strides": [[], [], [], [], [], []], "Input Dims": [[], [], [], [], [], []], "Ev Idx": 9416 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::abs", "pid": 2070552, "tid": 2070552, + "ts": 5333368994955.278, "dur": 23.426, + "args": { + "External id": 299005,"Record function id": 0, "Concrete Inputs": ["", ""], "Input type": ["float", "float"], "Input Strides": [[], [1]], "Input Dims": [[], [0]], "Ev Idx": 9417 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::resize_", "pid": 2070552, "tid": 2070552, + "ts": 5333368994960.711, "dur": 2.861, + "args": { + "External id": 299006,"Record function id": 0, "Concrete Inputs": ["", "[]", ""], "Input type": ["float", "ScalarList", ""], "Input Strides": [[1], [], []], "Input Dims": [[0], [], []], "Ev Idx": 9418 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::eq", "pid": 2070552, "tid": 2070552, + "ts": 5333368994980.695, "dur": 23.639, + "args": { + "External id": 299007,"Record function id": 0, "Concrete Inputs": ["", "inf"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9419 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::is_nonzero", "pid": 2070552, "tid": 2070552, + "ts": 5333368995007.784, "dur": 64.968, + "args": { + "External id": 299008,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9420 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::item", "pid": 2070552, "tid": 2070552, + "ts": 5333368995035.189, "dur": 37.382, + "args": { + "External id": 299009,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9421 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_local_scalar_dense", "pid": 2070552, "tid": 2070552, + "ts": 5333368995036.199, "dur": 35.899, + "args": { + "External id": 299010,"Sequence number": 1209239, "Fwd thread id": 0, "Record function id": 0, "Concrete Inputs": [""], "Input type": ["bool"], "Input Strides": [[]], "Input Dims": [[]], "Ev Idx": 9422 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#OptimizersContainer.step", "pid": 2070552, "tid": 2070552, + "ts": 5333368995109.768, "dur": 5545.690, + "args": { + "External id": 299011,"Record function id": 0, "Ev Idx": 9423 + } + }, + { + "ph": "X", "cat": "user_annotation", "name": "Optimizer.step#AdamW.step", "pid": 2070552, "tid": 2070552, + "ts": 5333368995140.535, "dur": 5461.192, + "args": { + "External id": 299012,"Record function id": 0, "Ev Idx": 9424 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_foreach_add_", "pid": 2070552, "tid": 2070552, + "ts": 5333368996318.282, "dur": 250.317, + "args": { + "External id": 299013,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["TensorList", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9425 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996337.788, "dur": 1.282, + "args": { + "External id": 299014,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9426 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996340.455, "dur": 0.068, + "args": { + "External id": 299015,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9427 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996341.028, "dur": 0.180, + "args": { + "External id": 299016,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9428 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996341.809, "dur": 0.172, + "args": { + "External id": 299017,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9429 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996342.401, "dur": 0.071, + "args": { + "External id": 299018,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9430 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996342.990, "dur": 0.132, + "args": { + "External id": 299019,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9431 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996343.593, "dur": 0.222, + "args": { + "External id": 299020,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9432 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996344.218, "dur": 0.209, + "args": { + "External id": 299021,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9433 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996344.827, "dur": 0.087, + "args": { + "External id": 299022,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9434 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996345.274, "dur": 0.067, + "args": { + "External id": 299023,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9435 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996345.840, "dur": 0.071, + "args": { + "External id": 299024,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9436 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996346.305, "dur": 0.200, + "args": { + "External id": 299025,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9437 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996346.866, "dur": 0.202, + "args": { + "External id": 299026,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9438 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996347.659, "dur": 0.097, + "args": { + "External id": 299027,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9439 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996348.042, "dur": 0.205, + "args": { + "External id": 299028,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9440 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996348.601, "dur": 0.098, + "args": { + "External id": 299029,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9441 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996349.039, "dur": 0.231, + "args": { + "External id": 299030,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9442 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996349.696, "dur": 0.062, + "args": { + "External id": 299031,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9443 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996350.203, "dur": 0.177, + "args": { + "External id": 299032,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9444 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996350.804, "dur": 0.216, + "args": { + "External id": 299033,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9445 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996351.460, "dur": 0.057, + "args": { + "External id": 299034,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9446 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996351.825, "dur": 0.102, + "args": { + "External id": 299035,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9447 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996352.371, "dur": 0.069, + "args": { + "External id": 299036,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9448 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996352.876, "dur": 0.064, + "args": { + "External id": 299037,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9449 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996353.349, "dur": 0.063, + "args": { + "External id": 299038,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9450 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996353.753, "dur": 0.063, + "args": { + "External id": 299039,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9451 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996354.267, "dur": 0.066, + "args": { + "External id": 299040,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9452 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996354.707, "dur": 0.069, + "args": { + "External id": 299041,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9453 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996355.087, "dur": 0.065, + "args": { + "External id": 299042,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9454 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996355.560, "dur": 0.065, + "args": { + "External id": 299043,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9455 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996356.307, "dur": 0.064, + "args": { + "External id": 299044,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9456 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996356.946, "dur": 0.066, + "args": { + "External id": 299045,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9457 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996357.465, "dur": 0.064, + "args": { + "External id": 299046,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9458 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996358.075, "dur": 0.065, + "args": { + "External id": 299047,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9459 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996358.645, "dur": 0.067, + "args": { + "External id": 299048,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9460 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996359.358, "dur": 0.054, + "args": { + "External id": 299049,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9461 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996359.844, "dur": 0.063, + "args": { + "External id": 299050,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9462 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996360.202, "dur": 0.065, + "args": { + "External id": 299051,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9463 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996360.571, "dur": 0.064, + "args": { + "External id": 299052,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9464 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996360.999, "dur": 0.066, + "args": { + "External id": 299053,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9465 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996361.513, "dur": 0.050, + "args": { + "External id": 299054,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9466 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996361.915, "dur": 0.061, + "args": { + "External id": 299055,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9467 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996362.310, "dur": 0.063, + "args": { + "External id": 299056,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9468 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996362.813, "dur": 0.065, + "args": { + "External id": 299057,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9469 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996363.222, "dur": 0.064, + "args": { + "External id": 299058,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9470 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996363.573, "dur": 0.066, + "args": { + "External id": 299059,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9471 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996363.953, "dur": 0.065, + "args": { + "External id": 299060,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9472 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996364.383, "dur": 0.064, + "args": { + "External id": 299061,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9473 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996364.707, "dur": 0.070, + "args": { + "External id": 299062,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9474 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996365.105, "dur": 0.062, + "args": { + "External id": 299063,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9475 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996365.710, "dur": 0.069, + "args": { + "External id": 299064,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9476 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996366.155, "dur": 0.064, + "args": { + "External id": 299065,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9477 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996366.539, "dur": 0.059, + "args": { + "External id": 299066,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9478 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996366.947, "dur": 0.059, + "args": { + "External id": 299067,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9479 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996367.332, "dur": 0.065, + "args": { + "External id": 299068,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9480 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996367.713, "dur": 0.080, + "args": { + "External id": 299069,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9481 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996368.325, "dur": 0.060, + "args": { + "External id": 299070,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9482 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996368.652, "dur": 0.068, + "args": { + "External id": 299071,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9483 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996369.127, "dur": 0.056, + "args": { + "External id": 299072,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9484 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996369.504, "dur": 0.066, + "args": { + "External id": 299073,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9485 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996369.912, "dur": 0.061, + "args": { + "External id": 299074,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9486 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996370.299, "dur": 0.065, + "args": { + "External id": 299075,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9487 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996370.743, "dur": 0.063, + "args": { + "External id": 299076,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9488 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996371.150, "dur": 0.066, + "args": { + "External id": 299077,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9489 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996371.514, "dur": 0.066, + "args": { + "External id": 299078,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9490 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996371.993, "dur": 0.069, + "args": { + "External id": 299079,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9491 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996372.498, "dur": 0.067, + "args": { + "External id": 299080,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9492 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996373.038, "dur": 0.063, + "args": { + "External id": 299081,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9493 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996373.572, "dur": 0.067, + "args": { + "External id": 299082,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9494 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996374.140, "dur": 0.065, + "args": { + "External id": 299083,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9495 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996374.783, "dur": 0.066, + "args": { + "External id": 299084,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9496 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996375.453, "dur": 0.065, + "args": { + "External id": 299085,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9497 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996375.796, "dur": 0.066, + "args": { + "External id": 299086,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9498 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996376.259, "dur": 0.064, + "args": { + "External id": 299087,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9499 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996376.741, "dur": 0.154, + "args": { + "External id": 299088,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9500 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996377.250, "dur": 0.066, + "args": { + "External id": 299089,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9501 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996377.745, "dur": 0.066, + "args": { + "External id": 299090,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9502 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996378.272, "dur": 0.202, + "args": { + "External id": 299091,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9503 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996378.858, "dur": 0.225, + "args": { + "External id": 299092,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9504 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996379.383, "dur": 0.237, + "args": { + "External id": 299093,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9505 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996379.941, "dur": 0.235, + "args": { + "External id": 299094,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9506 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996380.551, "dur": 0.056, + "args": { + "External id": 299095,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9507 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996381.000, "dur": 0.067, + "args": { + "External id": 299096,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9508 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996381.458, "dur": 0.067, + "args": { + "External id": 299097,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9509 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996382.009, "dur": 0.056, + "args": { + "External id": 299098,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9510 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996382.383, "dur": 0.068, + "args": { + "External id": 299099,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9511 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996382.779, "dur": 0.066, + "args": { + "External id": 299100,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9512 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996383.211, "dur": 0.062, + "args": { + "External id": 299101,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9513 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996383.625, "dur": 0.068, + "args": { + "External id": 299102,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9514 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996384.102, "dur": 0.064, + "args": { + "External id": 299103,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9515 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996384.496, "dur": 0.064, + "args": { + "External id": 299104,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9516 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996385.032, "dur": 0.066, + "args": { + "External id": 299105,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9517 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996385.447, "dur": 0.062, + "args": { + "External id": 299106,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9518 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996385.818, "dur": 0.066, + "args": { + "External id": 299107,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9519 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996386.251, "dur": 0.068, + "args": { + "External id": 299108,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9520 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996386.686, "dur": 0.066, + "args": { + "External id": 299109,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9521 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996387.176, "dur": 0.067, + "args": { + "External id": 299110,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9522 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996387.627, "dur": 0.066, + "args": { + "External id": 299111,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9523 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996388.142, "dur": 0.173, + "args": { + "External id": 299112,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9524 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996388.613, "dur": 0.201, + "args": { + "External id": 299113,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9525 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996389.154, "dur": 0.069, + "args": { + "External id": 299114,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9526 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996389.542, "dur": 0.203, + "args": { + "External id": 299115,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9527 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996390.058, "dur": 0.091, + "args": { + "External id": 299116,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9528 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996390.539, "dur": 0.192, + "args": { + "External id": 299117,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9529 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996391.018, "dur": 0.082, + "args": { + "External id": 299118,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9530 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996391.758, "dur": 0.064, + "args": { + "External id": 299119,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9531 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996392.204, "dur": 0.179, + "args": { + "External id": 299120,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9532 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996392.658, "dur": 0.175, + "args": { + "External id": 299121,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9533 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996393.280, "dur": 0.176, + "args": { + "External id": 299122,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9534 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996393.796, "dur": 0.064, + "args": { + "External id": 299123,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9535 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996394.257, "dur": 0.067, + "args": { + "External id": 299124,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9536 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996394.807, "dur": 0.065, + "args": { + "External id": 299125,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9537 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996395.229, "dur": 0.061, + "args": { + "External id": 299126,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9538 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996395.579, "dur": 0.065, + "args": { + "External id": 299127,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9539 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996396.083, "dur": 0.067, + "args": { + "External id": 299128,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9540 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996396.682, "dur": 0.059, + "args": { + "External id": 299129,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9541 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996397.201, "dur": 0.066, + "args": { + "External id": 299130,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9542 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996397.696, "dur": 0.068, + "args": { + "External id": 299131,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9543 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996398.204, "dur": 0.060, + "args": { + "External id": 299132,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9544 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996398.730, "dur": 0.066, + "args": { + "External id": 299133,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9545 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996399.144, "dur": 0.065, + "args": { + "External id": 299134,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9546 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996399.489, "dur": 0.061, + "args": { + "External id": 299135,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9547 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996399.994, "dur": 0.061, + "args": { + "External id": 299136,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9548 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996400.430, "dur": 0.061, + "args": { + "External id": 299137,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9549 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996400.823, "dur": 0.067, + "args": { + "External id": 299138,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9550 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996401.237, "dur": 0.066, + "args": { + "External id": 299139,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9551 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996401.671, "dur": 0.069, + "args": { + "External id": 299140,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9552 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996402.169, "dur": 0.054, + "args": { + "External id": 299141,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9553 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996402.547, "dur": 0.070, + "args": { + "External id": 299142,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9554 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996403.122, "dur": 0.065, + "args": { + "External id": 299143,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9555 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996403.505, "dur": 0.066, + "args": { + "External id": 299144,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9556 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996403.851, "dur": 0.066, + "args": { + "External id": 299145,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9557 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996404.290, "dur": 0.208, + "args": { + "External id": 299146,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9558 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996404.907, "dur": 0.098, + "args": { + "External id": 299147,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9559 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996405.396, "dur": 0.185, + "args": { + "External id": 299148,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9560 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996405.973, "dur": 0.198, + "args": { + "External id": 299149,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9561 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996406.669, "dur": 0.191, + "args": { + "External id": 299150,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9562 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996407.236, "dur": 0.066, + "args": { + "External id": 299151,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9563 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996407.585, "dur": 0.066, + "args": { + "External id": 299152,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9564 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996407.948, "dur": 0.065, + "args": { + "External id": 299153,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9565 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996408.433, "dur": 0.210, + "args": { + "External id": 299154,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9566 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996408.924, "dur": 0.200, + "args": { + "External id": 299155,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9567 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996409.490, "dur": 0.083, + "args": { + "External id": 299156,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9568 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996409.873, "dur": 0.105, + "args": { + "External id": 299157,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9569 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996410.336, "dur": 0.190, + "args": { + "External id": 299158,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9570 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996410.860, "dur": 0.068, + "args": { + "External id": 299159,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9571 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996411.296, "dur": 0.067, + "args": { + "External id": 299160,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9572 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996411.724, "dur": 0.063, + "args": { + "External id": 299161,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9573 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996412.138, "dur": 0.068, + "args": { + "External id": 299162,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9574 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996412.465, "dur": 0.064, + "args": { + "External id": 299163,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9575 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996412.978, "dur": 0.062, + "args": { + "External id": 299164,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9576 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996413.352, "dur": 0.384, + "args": { + "External id": 299165,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9577 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996414.194, "dur": 0.219, + "args": { + "External id": 299166,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9578 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996414.845, "dur": 0.064, + "args": { + "External id": 299167,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9579 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996415.363, "dur": 0.205, + "args": { + "External id": 299168,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9580 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996416.004, "dur": 0.063, + "args": { + "External id": 299169,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9581 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996416.366, "dur": 0.067, + "args": { + "External id": 299170,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9582 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996416.869, "dur": 0.067, + "args": { + "External id": 299171,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9583 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996417.253, "dur": 0.068, + "args": { + "External id": 299172,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9584 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996417.789, "dur": 0.090, + "args": { + "External id": 299173,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9585 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996418.186, "dur": 0.070, + "args": { + "External id": 299174,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9586 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996418.679, "dur": 0.067, + "args": { + "External id": 299175,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9587 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996419.193, "dur": 0.052, + "args": { + "External id": 299176,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9588 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996419.731, "dur": 0.062, + "args": { + "External id": 299177,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9589 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996420.244, "dur": 0.067, + "args": { + "External id": 299178,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9590 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996420.682, "dur": 0.065, + "args": { + "External id": 299179,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9591 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996421.060, "dur": 0.069, + "args": { + "External id": 299180,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9592 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996421.444, "dur": 0.065, + "args": { + "External id": 299181,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9593 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996421.840, "dur": 0.067, + "args": { + "External id": 299182,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9594 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996422.226, "dur": 0.064, + "args": { + "External id": 299183,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9595 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996422.570, "dur": 0.064, + "args": { + "External id": 299184,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9596 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996429.793, "dur": 0.077, + "args": { + "External id": 299185,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9597 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996431.095, "dur": 0.067, + "args": { + "External id": 299186,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9598 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996431.560, "dur": 0.068, + "args": { + "External id": 299187,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9599 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996431.982, "dur": 0.068, + "args": { + "External id": 299188,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9600 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996432.465, "dur": 0.063, + "args": { + "External id": 299189,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9601 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996432.892, "dur": 0.071, + "args": { + "External id": 299190,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9602 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996433.325, "dur": 0.086, + "args": { + "External id": 299191,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9603 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996433.708, "dur": 0.067, + "args": { + "External id": 299192,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9604 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996434.144, "dur": 0.064, + "args": { + "External id": 299193,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9605 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996434.492, "dur": 0.063, + "args": { + "External id": 299194,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9606 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996434.955, "dur": 0.051, + "args": { + "External id": 299195,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9607 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996435.388, "dur": 0.052, + "args": { + "External id": 299196,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9608 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996435.983, "dur": 0.066, + "args": { + "External id": 299197,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9609 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996436.524, "dur": 0.061, + "args": { + "External id": 299198,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9610 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996437.028, "dur": 0.061, + "args": { + "External id": 299199,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9611 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996437.453, "dur": 0.067, + "args": { + "External id": 299200,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9612 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996437.875, "dur": 0.061, + "args": { + "External id": 299201,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9613 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996438.304, "dur": 0.067, + "args": { + "External id": 299202,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9614 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996438.669, "dur": 0.065, + "args": { + "External id": 299203,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9615 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996439.086, "dur": 0.070, + "args": { + "External id": 299204,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9616 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996439.520, "dur": 0.069, + "args": { + "External id": 299205,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9617 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996439.901, "dur": 0.057, + "args": { + "External id": 299206,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9618 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996440.354, "dur": 0.054, + "args": { + "External id": 299207,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9619 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996440.965, "dur": 0.067, + "args": { + "External id": 299208,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9620 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996441.302, "dur": 0.062, + "args": { + "External id": 299209,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9621 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996441.720, "dur": 0.065, + "args": { + "External id": 299210,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9622 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996442.168, "dur": 0.063, + "args": { + "External id": 299211,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9623 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996442.603, "dur": 0.064, + "args": { + "External id": 299212,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9624 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996443.148, "dur": 0.068, + "args": { + "External id": 299213,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9625 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996443.556, "dur": 0.064, + "args": { + "External id": 299214,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9626 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996444.058, "dur": 0.067, + "args": { + "External id": 299215,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9627 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996444.419, "dur": 0.072, + "args": { + "External id": 299216,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9628 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996444.835, "dur": 0.066, + "args": { + "External id": 299217,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9629 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996445.336, "dur": 0.066, + "args": { + "External id": 299218,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9630 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996445.982, "dur": 0.069, + "args": { + "External id": 299219,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9631 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996446.484, "dur": 0.061, + "args": { + "External id": 299220,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9632 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996446.862, "dur": 0.059, + "args": { + "External id": 299221,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9633 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996447.222, "dur": 0.093, + "args": { + "External id": 299222,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9634 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996447.642, "dur": 0.067, + "args": { + "External id": 299223,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9635 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996448.087, "dur": 0.069, + "args": { + "External id": 299224,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9636 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996448.433, "dur": 0.066, + "args": { + "External id": 299225,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9637 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996448.931, "dur": 0.062, + "args": { + "External id": 299226,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9638 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996449.274, "dur": 0.064, + "args": { + "External id": 299227,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9639 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996449.737, "dur": 0.055, + "args": { + "External id": 299228,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9640 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996450.081, "dur": 0.065, + "args": { + "External id": 299229,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9641 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996450.516, "dur": 0.282, + "args": { + "External id": 299230,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9642 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996451.127, "dur": 0.065, + "args": { + "External id": 299231,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9643 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996451.495, "dur": 0.084, + "args": { + "External id": 299232,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9644 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996451.948, "dur": 0.067, + "args": { + "External id": 299233,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9645 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996452.342, "dur": 0.076, + "args": { + "External id": 299234,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9646 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996452.742, "dur": 0.229, + "args": { + "External id": 299235,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9647 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996453.327, "dur": 0.211, + "args": { + "External id": 299236,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9648 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996453.807, "dur": 0.101, + "args": { + "External id": 299237,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9649 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996454.267, "dur": 0.226, + "args": { + "External id": 299238,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9650 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996454.851, "dur": 0.070, + "args": { + "External id": 299239,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9651 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996455.304, "dur": 0.186, + "args": { + "External id": 299240,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9652 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996455.809, "dur": 0.065, + "args": { + "External id": 299241,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9653 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996456.208, "dur": 0.065, + "args": { + "External id": 299242,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9654 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996456.679, "dur": 0.069, + "args": { + "External id": 299243,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9655 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996457.233, "dur": 0.067, + "args": { + "External id": 299244,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9656 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996457.685, "dur": 0.071, + "args": { + "External id": 299245,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9657 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996458.075, "dur": 0.065, + "args": { + "External id": 299246,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9658 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996458.478, "dur": 0.066, + "args": { + "External id": 299247,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9659 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996458.800, "dur": 0.069, + "args": { + "External id": 299248,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9660 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996459.144, "dur": 0.067, + "args": { + "External id": 299249,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9661 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996459.560, "dur": 0.064, + "args": { + "External id": 299250,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9662 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996459.904, "dur": 0.067, + "args": { + "External id": 299251,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9663 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996460.342, "dur": 0.069, + "args": { + "External id": 299252,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9664 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996460.755, "dur": 0.067, + "args": { + "External id": 299253,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9665 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996461.188, "dur": 0.065, + "args": { + "External id": 299254,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9666 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996461.629, "dur": 0.087, + "args": { + "External id": 299255,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9667 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996462.036, "dur": 0.066, + "args": { + "External id": 299256,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9668 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996462.858, "dur": 0.065, + "args": { + "External id": 299257,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9669 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996463.525, "dur": 0.066, + "args": { + "External id": 299258,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9670 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996463.889, "dur": 0.069, + "args": { + "External id": 299259,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9671 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996464.404, "dur": 0.062, + "args": { + "External id": 299260,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9672 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996464.846, "dur": 0.070, + "args": { + "External id": 299261,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9673 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996465.399, "dur": 0.066, + "args": { + "External id": 299262,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9674 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996465.935, "dur": 0.067, + "args": { + "External id": 299263,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9675 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996466.342, "dur": 0.065, + "args": { + "External id": 299264,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9676 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996466.744, "dur": 0.064, + "args": { + "External id": 299265,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9677 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996467.105, "dur": 0.063, + "args": { + "External id": 299266,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9678 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996467.546, "dur": 0.067, + "args": { + "External id": 299267,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9679 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996467.992, "dur": 0.070, + "args": { + "External id": 299268,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9680 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996468.440, "dur": 0.068, + "args": { + "External id": 299269,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9681 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996468.895, "dur": 0.068, + "args": { + "External id": 299270,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9682 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996469.277, "dur": 0.063, + "args": { + "External id": 299271,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9683 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996469.711, "dur": 0.065, + "args": { + "External id": 299272,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9684 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996470.159, "dur": 0.067, + "args": { + "External id": 299273,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9685 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996470.538, "dur": 0.183, + "args": { + "External id": 299274,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9686 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996471.002, "dur": 0.217, + "args": { + "External id": 299275,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9687 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996471.506, "dur": 0.199, + "args": { + "External id": 299276,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9688 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996472.131, "dur": 0.215, + "args": { + "External id": 299277,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9689 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996472.709, "dur": 0.206, + "args": { + "External id": 299278,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9690 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996473.207, "dur": 0.065, + "args": { + "External id": 299279,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9691 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996473.751, "dur": 0.181, + "args": { + "External id": 299280,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9692 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996474.308, "dur": 0.071, + "args": { + "External id": 299281,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9693 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996474.831, "dur": 0.067, + "args": { + "External id": 299282,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9694 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996475.238, "dur": 0.055, + "args": { + "External id": 299283,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9695 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996475.682, "dur": 0.064, + "args": { + "External id": 299284,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9696 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996476.029, "dur": 0.050, + "args": { + "External id": 299285,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9697 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996476.468, "dur": 0.197, + "args": { + "External id": 299286,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9698 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996476.945, "dur": 0.053, + "args": { + "External id": 299287,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9699 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996477.312, "dur": 0.064, + "args": { + "External id": 299288,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9700 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996477.614, "dur": 0.054, + "args": { + "External id": 299289,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9701 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996478.246, "dur": 0.065, + "args": { + "External id": 299290,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9702 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996478.552, "dur": 0.053, + "args": { + "External id": 299291,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9703 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996478.995, "dur": 0.070, + "args": { + "External id": 299292,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9704 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996479.306, "dur": 0.052, + "args": { + "External id": 299293,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9705 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996479.806, "dur": 0.064, + "args": { + "External id": 299294,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9706 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996480.146, "dur": 0.056, + "args": { + "External id": 299295,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9707 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996480.535, "dur": 0.067, + "args": { + "External id": 299296,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9708 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996481.054, "dur": 0.065, + "args": { + "External id": 299297,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9709 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996481.595, "dur": 0.063, + "args": { + "External id": 299298,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9710 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996486.185, "dur": 0.072, + "args": { + "External id": 299299,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9711 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996486.652, "dur": 0.066, + "args": { + "External id": 299300,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9712 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996487.020, "dur": 0.056, + "args": { + "External id": 299301,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9713 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996487.501, "dur": 0.066, + "args": { + "External id": 299302,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9714 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996487.812, "dur": 0.054, + "args": { + "External id": 299303,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9715 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996488.322, "dur": 0.282, + "args": { + "External id": 299304,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9716 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::result_type", "pid": 2070552, "tid": 2070552, + "ts": 5333368996489.058, "dur": 0.080, + "args": { + "External id": 299305,"Record function id": 0, "Concrete Inputs": ["", "1"], "Input type": ["float", "Scalar"], "Input Strides": [[], []], "Input Dims": [[], []], "Ev Idx": 9717 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 2070552, "tid": 2070552, + "ts": 5333368997068.190, "dur": 3442.486, + "args": { + "External id": 299306,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "3.6769426533212504e-05", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9718 + } + }, + { + "ph": "X", "cat": "cpu_op", "name": "aten::_fused_adamw_", "pid": 2070552, "tid": 2070552, + "ts": 5333369000018.505, "dur": 340.759, + "args": { + "External id": 299307,"Record function id": 0, "Concrete Inputs": ["", "", "", "", "", "", "3.6769426533212504e-05", "0.90000000000000002", "0.94999999999999996", "0.10000000000000001", "1.0000000000000001e-15", "False", "False", "", ""], "Input type": ["TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "TensorList", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "Scalar", "", ""], "Input Strides": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Input Dims": [[], [], [], [], [], [], [], [], [], [], [], [], [], [], []], "Ev Idx": 9719 + } + }, + { + "name": "process_name", "ph": "M", "ts": 5333366952227.145, "pid": 2070552, "tid": 0, + "args": { + "name": "python3.12" + } + }, + { + "name": "process_labels", "ph": "M", "ts": 5333366952227.145, "pid": 2070552, "tid": 0, + "args": { + "labels": "CPU" + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 5333366952227.145, "pid": 2070552, "tid": 0, + "args": { + "sort_index": 2070552 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 5333366952227.145, "pid": 2070552, "tid": 2070552, + "args": { + "name": "thread 2070552 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 5333366952227.145, "pid": 2070552, "tid": 2070552, + "args": { + "sort_index": 2070552 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 5333366952227.145, "pid": 2070552, "tid": 2107648, + "args": { + "name": "thread 2107648 (python3.12)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 5333366952227.145, "pid": 2070552, "tid": 2107648, + "args": { + "sort_index": 2107648 + } + }, + { + "name": "thread_name", "ph": "M", "ts": 5333366952227.145, "pid": 2070552, "tid": 2107648, + "args": { + "name": "thread 2107648 (pt_autograd_5)" + } + }, + { + "name": "thread_sort_index", "ph": "M", "ts": 5333366952227.145, "pid": 2070552, "tid": 2107648, + "args": { + "sort_index": 2107648 + } + }, + { + "ph": "X", "cat": "Trace", "ts": 5333366952146.619, "dur": 2050254.692, + "pid": "Spans", "tid": "PyTorch Profiler", + "name": "PyTorch Profiler (0)", + "args": { + "Op count": 0 + } + }, + { + "name": "process_sort_index", "ph": "M", "ts": 5333366952146.619, + "pid": "Spans", "tid": 0, + "args": { + "sort_index": 536870912 + } + }, + { + "name": "Iteration Start: PyTorch Profiler", "ph": "i", "s": "g", + "pid": "Traces", "tid": "Trace PyTorch Profiler", "ts": 5333366952146.619 + }, + { + "name": "Record Window End", "ph": "i", "s": "g", + "pid": "", "tid": "", "ts": 5333369045706.910 + } + ], + "traceName": "exp/top.code.1B.batch16.seqlen4096.context4096.warmup400.update1.steps40000.lr5e-5.cosine/profile_trace/iteration_14848/rank5_trace.json", + "displayTimeUnit": "ms", + "baseTimeNanoseconds": 1751410836000000000 +} \ No newline at end of file